diff --git hcatalog/bin/hcat hcatalog/bin/hcat index a81f144..6fb1a59 100644 --- hcatalog/bin/hcat +++ hcatalog/bin/hcat @@ -151,12 +151,12 @@ export HADOOP_OPTS=$HADOOP_OPTS # run it if [ "$debug" == "true" ]; then echo "Would run:" - echo "exec $HADOOP_PREFIX/bin/hadoop jar $HCAT_JAR org.apache.hcatalog.cli.HCatCli $remaining" + echo "exec $HADOOP_PREFIX/bin/hadoop jar $HCAT_JAR org.apache.hive.hcatalog.cli.HCatCli $remaining" echo "with HADOOP_CLASSPATH set to ($HADOOP_CLASSPATH)" echo "and HADOOP_OPTS set to ($HADOOP_OPTS)" elif [ "$dump_classpath" == "true" ]; then echo $HADOOP_CLASSPATH else - exec $HADOOP_PREFIX/bin/hadoop jar $HCAT_JAR org.apache.hcatalog.cli.HCatCli "$@" + exec $HADOOP_PREFIX/bin/hadoop jar $HCAT_JAR org.apache.hive.hcatalog.cli.HCatCli "$@" fi diff --git hcatalog/bin/hcat.py hcatalog/bin/hcat.py index 06e68b2..53fc387 100644 --- hcatalog/bin/hcat.py +++ hcatalog/bin/hcat.py @@ -129,9 +129,9 @@ cmdLine = os.path.join(os.environ['HADOOP_PREFIX'], "bin", hadoopcmd) if os.name == "posix": - cmd = [cmdLine, "jar", hcatJars[0], "org.apache.hcatalog.cli.HCatCli"] + sys.argv[1:len(sys.argv)] + cmd = [cmdLine, "jar", hcatJars[0], "org.apache.hive.hcatalog.cli.HCatCli"] + sys.argv[1:len(sys.argv)] else: - cmd = ["call", cmdLine, "jar", hcatJars[0], "org.apache.hcatalog.cli.HCatCli"] + sys.argv[1:len(sys.argv)] + cmd = ["call", cmdLine, "jar", hcatJars[0], "org.apache.hive.hcatalog.cli.HCatCli"] + sys.argv[1:len(sys.argv)] if debug == 1: diff --git hcatalog/build-support/ant/deploy.xml hcatalog/build-support/ant/deploy.xml index 3404154..1e44178 100644 --- hcatalog/build-support/ant/deploy.xml +++ hcatalog/build-support/ant/deploy.xml @@ -80,7 +80,7 @@ classpath="${path.to.basedir}/build/maven-ant-tasks-${maven-ant-tasks.version}.jar"/> - + diff --git hcatalog/build.xml hcatalog/build.xml index 668fd97..858f001 100644 --- hcatalog/build.xml +++ hcatalog/build.xml @@ -224,7 +224,7 @@ - + diff --git hcatalog/conf/proto-hive-site.xml hcatalog/conf/proto-hive-site.xml index 0b1a69d..7437484 100644 --- hcatalog/conf/proto-hive-site.xml +++ hcatalog/conf/proto-hive-site.xml @@ -96,7 +96,7 @@ hive.semantic.analyzer.factory.impl - org.apache.hcatalog.cli.HCatSemanticAnalyzerFactory + org.apache.hive.hcatalog.cli.HCatSemanticAnalyzerFactory controls which SemanticAnalyzerFactory implemenation class is used by CLI @@ -114,7 +114,7 @@ hive.security.authorization.manager - org.apache.hcatalog.security.StorageDelegationAuthorizationProvider + org.apache.hive.hcatalog.security.StorageDelegationAuthorizationProvider the hive client authorization manager class name. The user defined authorization class should implement interface org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider. HCatalog uses a model, where authorization checks are delegated to the storage layer (hdfs, hbase, ...). diff --git hcatalog/core/pom.xml hcatalog/core/pom.xml index 5cc4cc5..00e90df 100644 --- hcatalog/core/pom.xml +++ hcatalog/core/pom.xml @@ -22,14 +22,13 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> - org.apache.hcatalog + org.apache.hive.hcatalog hcatalog 0.12.0-SNAPSHOT ../pom.xml 4.0.0 - org.apache.hcatalog hcatalog-core jar hcatalog-core diff --git hcatalog/core/src/main/java/org/apache/hcatalog/cli/HCatCli.java hcatalog/core/src/main/java/org/apache/hcatalog/cli/HCatCli.java deleted file mode 100644 index 0a80ab7..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/cli/HCatCli.java +++ /dev/null @@ -1,331 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.cli; - -import java.io.BufferedReader; -import java.io.FileNotFoundException; -import java.io.FileReader; -import java.io.IOException; -import java.io.OutputStream; -import java.io.PrintStream; -import java.io.PrintWriter; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.Properties; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.GnuParser; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Option; -import org.apache.commons.cli.OptionBuilder; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; -import org.apache.commons.cli.Parser; -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.common.LogUtils; -import org.apache.hadoop.hive.common.LogUtils.LogInitializationException; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.apache.hadoop.hive.ql.processors.DfsProcessor; -import org.apache.hadoop.hive.ql.processors.SetProcessor; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; - -public class HCatCli { - - @SuppressWarnings("static-access") - public static void main(String[] args) { - - try { - LogUtils.initHiveLog4j(); - } catch (LogInitializationException e) { - - } - - CliSessionState ss = new CliSessionState(new HiveConf(SessionState.class)); - ss.in = System.in; - try { - ss.out = new PrintStream(System.out, true, "UTF-8"); - ss.err = new PrintStream(System.err, true, "UTF-8"); - } catch (UnsupportedEncodingException e) { - System.exit(1); - } - - HiveConf conf = ss.getConf(); - - HiveConf.setVar(conf, ConfVars.SEMANTIC_ANALYZER_HOOK, HCatSemanticAnalyzer.class.getName()); - - SessionState.start(ss); - - Options options = new Options(); - - // -e 'quoted-query-string' - options.addOption(OptionBuilder - .hasArg() - .withArgName("exec") - .withDescription("hcat command given from command line") - .create('e')); - - // -f - options.addOption(OptionBuilder - .hasArg() - .withArgName("file") - .withDescription("hcat commands in file") - .create('f')); - - // -g - options.addOption(OptionBuilder - .hasArg(). - withArgName("group"). - withDescription("group for the db/table specified in CREATE statement"). - create('g')); - - // -p - options.addOption(OptionBuilder - .hasArg() - .withArgName("perms") - .withDescription("permissions for the db/table specified in CREATE statement") - .create('p')); - - // -D - options.addOption(OptionBuilder - .hasArgs(2) - .withArgName("property=value") - .withValueSeparator() - .withDescription("use hadoop value for given property") - .create('D')); - - // [-h|--help] - options.addOption(new Option("h", "help", false, "Print help information")); - - Parser parser = new GnuParser(); - CommandLine cmdLine = null; - - try { - cmdLine = parser.parse(options, args); - - } catch (ParseException e) { - printUsage(options, ss.err); - System.exit(1); - } - // -e - String execString = (String) cmdLine.getOptionValue('e'); - // -f - String fileName = (String) cmdLine.getOptionValue('f'); - // -h - if (cmdLine.hasOption('h')) { - printUsage(options, ss.out); - System.exit(0); - } - - if (execString != null && fileName != null) { - ss.err.println("The '-e' and '-f' options cannot be specified simultaneously"); - printUsage(options, ss.err); - System.exit(1); - } - - // -p - String perms = (String) cmdLine.getOptionValue('p'); - if (perms != null) { - validatePermissions(ss, conf, perms); - } - - // -g - String grp = (String) cmdLine.getOptionValue('g'); - if (grp != null) { - conf.set(HCatConstants.HCAT_GROUP, grp); - } - - // -D - setConfProperties(conf, cmdLine.getOptionProperties("D")); - - if (execString != null) { - System.exit(processLine(execString)); - } - - try { - if (fileName != null) { - System.exit(processFile(fileName)); - } - } catch (FileNotFoundException e) { - ss.err.println("Input file not found. (" + e.getMessage() + ")"); - System.exit(1); - } catch (IOException e) { - ss.err.println("Could not open input file for reading. (" + e.getMessage() + ")"); - System.exit(1); - } - - // -h - printUsage(options, ss.err); - System.exit(1); - } - - private static void setConfProperties(HiveConf conf, Properties props) { - for (java.util.Map.Entry e : props.entrySet()) - conf.set((String) e.getKey(), (String) e.getValue()); - } - - private static int processLine(String line) { - int ret = 0; - - String command = ""; - for (String oneCmd : line.split(";")) { - - if (StringUtils.endsWith(oneCmd, "\\")) { - command += StringUtils.chop(oneCmd) + ";"; - continue; - } else { - command += oneCmd; - } - if (StringUtils.isBlank(command)) { - continue; - } - - ret = processCmd(command); - command = ""; - } - return ret; - } - - private static int processFile(String fileName) throws IOException { - FileReader fileReader = null; - BufferedReader reader = null; - try { - fileReader = new FileReader(fileName); - reader = new BufferedReader(fileReader); - String line; - StringBuilder qsb = new StringBuilder(); - - while ((line = reader.readLine()) != null) { - qsb.append(line + "\n"); - } - - return (processLine(qsb.toString())); - } finally { - if (fileReader != null) { - fileReader.close(); - } - if (reader != null) { - reader.close(); - } - } - } - - private static int processCmd(String cmd) { - - SessionState ss = SessionState.get(); - long start = System.currentTimeMillis(); - - cmd = cmd.trim(); - String firstToken = cmd.split("\\s+")[0].trim(); - - if (firstToken.equalsIgnoreCase("set")) { - return new SetProcessor().run(cmd.substring(firstToken.length()).trim()).getResponseCode(); - } else if (firstToken.equalsIgnoreCase("dfs")) { - return new DfsProcessor(ss.getConf()).run(cmd.substring(firstToken.length()).trim()).getResponseCode(); - } - - HCatDriver driver = new HCatDriver(); - - int ret = driver.run(cmd).getResponseCode(); - - if (ret != 0) { - driver.close(); - System.exit(ret); - } - - ArrayList res = new ArrayList(); - try { - while (driver.getResults(res)) { - for (String r : res) { - ss.out.println(r); - } - res.clear(); - } - } catch (IOException e) { - ss.err.println("Failed with exception " + e.getClass().getName() + ":" - + e.getMessage() + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); - ret = 1; - } catch (CommandNeedRetryException e) { - ss.err.println("Failed with exception " + e.getClass().getName() + ":" - + e.getMessage() + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); - ret = 1; - } - - int cret = driver.close(); - if (ret == 0) { - ret = cret; - } - - long end = System.currentTimeMillis(); - if (end > start) { - double timeTaken = (end - start) / 1000.0; - ss.err.println("Time taken: " + timeTaken + " seconds"); - } - return ret; - } - - private static void printUsage(Options options, OutputStream os) { - PrintWriter pw = new PrintWriter(os); - new HelpFormatter().printHelp(pw, 2 * HelpFormatter.DEFAULT_WIDTH, - "hcat { -e \"\" | -f \"\" } [ -g \"\" ] [ -p \"\" ] [ -D\"=\" ]", - null, options, HelpFormatter.DEFAULT_LEFT_PAD, HelpFormatter.DEFAULT_DESC_PAD, - null, false); - pw.flush(); - } - - private static void validatePermissions(CliSessionState ss, HiveConf conf, String perms) { - perms = perms.trim(); - FsPermission fp = null; - - if (perms.matches("^\\s*([r,w,x,-]{9})\\s*$")) { - fp = FsPermission.valueOf("d" + perms); - } else if (perms.matches("^\\s*([0-7]{3})\\s*$")) { - fp = new FsPermission(Short.decode("0" + perms)); - } else { - ss.err.println("Invalid permission specification: " + perms); - System.exit(1); - } - - if (!HCatUtil.validateMorePermissive(fp.getUserAction(), fp.getGroupAction())) { - ss.err.println("Invalid permission specification: " + perms + " : user permissions must be more permissive than group permission "); - System.exit(1); - } - if (!HCatUtil.validateMorePermissive(fp.getGroupAction(), fp.getOtherAction())) { - ss.err.println("Invalid permission specification: " + perms + " : group permissions must be more permissive than other permission "); - System.exit(1); - } - if ((!HCatUtil.validateExecuteBitPresentIfReadOrWrite(fp.getUserAction())) || - (!HCatUtil.validateExecuteBitPresentIfReadOrWrite(fp.getGroupAction())) || - (!HCatUtil.validateExecuteBitPresentIfReadOrWrite(fp.getOtherAction()))) { - ss.err.println("Invalid permission specification: " + perms + " : permissions must have execute permissions if read or write permissions are specified "); - System.exit(1); - } - - conf.set(HCatConstants.HCAT_PERMS, "d" + fp.toString()); - - } - - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/cli/HCatDriver.java hcatalog/core/src/main/java/org/apache/hcatalog/cli/HCatDriver.java deleted file mode 100644 index a3f26fd..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/cli/HCatDriver.java +++ /dev/null @@ -1,143 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.cli; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hcatalog.common.HCatConstants; - -public class HCatDriver extends Driver { - - @Override - public CommandProcessorResponse run(String command) { - - CommandProcessorResponse cpr = null; - try { - cpr = super.run(command); - } catch (CommandNeedRetryException e) { - return new CommandProcessorResponse(-1, e.toString(), ""); - } - - SessionState ss = SessionState.get(); - - if (cpr.getResponseCode() == 0) { - // Only attempt to do this, if cmd was successful. - int rc = setFSPermsNGrp(ss); - cpr = new CommandProcessorResponse(rc); - } - // reset conf vars - ss.getConf().set(HCatConstants.HCAT_CREATE_DB_NAME, ""); - ss.getConf().set(HCatConstants.HCAT_CREATE_TBL_NAME, ""); - - return cpr; - } - - private int setFSPermsNGrp(SessionState ss) { - - Configuration conf = ss.getConf(); - - String tblName = conf.get(HCatConstants.HCAT_CREATE_TBL_NAME, ""); - if (tblName.isEmpty()) { - tblName = conf.get("import.destination.table", ""); - conf.set("import.destination.table", ""); - } - String dbName = conf.get(HCatConstants.HCAT_CREATE_DB_NAME, ""); - String grp = conf.get(HCatConstants.HCAT_GROUP, null); - String permsStr = conf.get(HCatConstants.HCAT_PERMS, null); - - if (tblName.isEmpty() && dbName.isEmpty()) { - // it wasn't create db/table - return 0; - } - - if (null == grp && null == permsStr) { - // there were no grp and perms to begin with. - return 0; - } - - FsPermission perms = FsPermission.valueOf(permsStr); - - if (!tblName.isEmpty()) { - Hive db = null; - try { - db = Hive.get(); - Table tbl = db.getTable(tblName); - Path tblPath = tbl.getPath(); - - FileSystem fs = tblPath.getFileSystem(conf); - if (null != perms) { - fs.setPermission(tblPath, perms); - } - if (null != grp) { - fs.setOwner(tblPath, null, grp); - } - return 0; - - } catch (Exception e) { - ss.err.println(String.format("Failed to set permissions/groups on TABLE: <%s> %s", tblName, e.getMessage())); - try { // We need to drop the table. - if (null != db) { - db.dropTable(tblName); - } - } catch (HiveException he) { - ss.err.println(String.format("Failed to drop TABLE <%s> after failing to set permissions/groups on it. %s", tblName, e.getMessage())); - } - return 1; - } - } else { - // looks like a db operation - if (dbName.isEmpty() || dbName.equals(MetaStoreUtils.DEFAULT_DATABASE_NAME)) { - // We dont set perms or groups for default dir. - return 0; - } else { - try { - Hive db = Hive.get(); - Path dbPath = new Warehouse(conf).getDatabasePath(db.getDatabase(dbName)); - FileSystem fs = dbPath.getFileSystem(conf); - if (perms != null) { - fs.setPermission(dbPath, perms); - } - if (null != grp) { - fs.setOwner(dbPath, null, grp); - } - return 0; - } catch (Exception e) { - ss.err.println(String.format("Failed to set permissions and/or group on DB: <%s> %s", dbName, e.getMessage())); - try { - Hive.get().dropDatabase(dbName); - } catch (Exception e1) { - ss.err.println(String.format("Failed to drop DB <%s> after failing to set permissions/group on it. %s", dbName, e1.getMessage())); - } - return 1; - } - } - } - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/CreateDatabaseHook.java hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/CreateDatabaseHook.java deleted file mode 100644 index d67205f..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/CreateDatabaseHook.java +++ /dev/null @@ -1,96 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.cli.SemanticAnalysis; - -import java.io.Serializable; -import java.util.List; - -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.parse.ASTNode; -import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; -import org.apache.hadoop.hive.ql.parse.HiveParser; -import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.CreateDatabaseDesc; -import org.apache.hadoop.hive.ql.plan.DDLWork; -import org.apache.hadoop.hive.ql.security.authorization.Privilege; -import org.apache.hcatalog.common.HCatConstants; - -final class CreateDatabaseHook extends HCatSemanticAnalyzerBase { - - String databaseName; - - @Override - public ASTNode preAnalyze(HiveSemanticAnalyzerHookContext context, ASTNode ast) - throws SemanticException { - - Hive db; - try { - db = context.getHive(); - } catch (HiveException e) { - throw new SemanticException("Couldn't get Hive DB instance in semantic analysis phase.", e); - } - - // Analyze and create tbl properties object - int numCh = ast.getChildCount(); - - databaseName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) ast.getChild(0)); - - for (int num = 1; num < numCh; num++) { - ASTNode child = (ASTNode) ast.getChild(num); - - switch (child.getToken().getType()) { - - case HiveParser.TOK_IFNOTEXISTS: - try { - List dbs = db.getDatabasesByPattern(databaseName); - if (dbs != null && dbs.size() > 0) { // db exists - return ast; - } - } catch (HiveException e) { - throw new SemanticException(e); - } - break; - } - } - - return ast; - } - - @Override - public void postAnalyze(HiveSemanticAnalyzerHookContext context, - List> rootTasks) throws SemanticException { - context.getConf().set(HCatConstants.HCAT_CREATE_DB_NAME, databaseName); - super.postAnalyze(context, rootTasks); - } - - @Override - protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext context, - Hive hive, DDLWork work) throws HiveException { - CreateDatabaseDesc createDb = work.getCreateDatabaseDesc(); - if (createDb != null) { - Database db = new Database(createDb.getName(), createDb.getComment(), - createDb.getLocationUri(), createDb.getDatabaseProperties()); - authorize(db, Privilege.CREATE); - } - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/CreateTableHook.java hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/CreateTableHook.java deleted file mode 100644 index 663b6c9..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/CreateTableHook.java +++ /dev/null @@ -1,245 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.cli.SemanticAnalysis; - -import java.io.IOException; -import java.io.Serializable; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.ql.exec.DDLTask; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.parse.ASTNode; -import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; -import org.apache.hadoop.hive.ql.parse.HiveParser; -import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.CreateTableDesc; -import org.apache.hadoop.hive.ql.security.authorization.Privilege; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.mapreduce.HCatStorageHandler; - -final class CreateTableHook extends HCatSemanticAnalyzerBase { - - private String tableName; - - @Override - public ASTNode preAnalyze(HiveSemanticAnalyzerHookContext context, - ASTNode ast) throws SemanticException { - - Hive db; - try { - db = context.getHive(); - } catch (HiveException e) { - throw new SemanticException( - "Couldn't get Hive DB instance in semantic analysis phase.", - e); - } - - // Analyze and create tbl properties object - int numCh = ast.getChildCount(); - - String inputFormat = null, outputFormat = null; - tableName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) ast - .getChild(0)); - boolean likeTable = false; - - for (int num = 1; num < numCh; num++) { - ASTNode child = (ASTNode) ast.getChild(num); - - switch (child.getToken().getType()) { - - case HiveParser.TOK_QUERY: // CTAS - throw new SemanticException( - "Operation not supported. Create table as " + - "Select is not a valid operation."); - - case HiveParser.TOK_TABLEBUCKETS: - break; - - case HiveParser.TOK_TBLSEQUENCEFILE: - inputFormat = HCatConstants.SEQUENCEFILE_INPUT; - outputFormat = HCatConstants.SEQUENCEFILE_OUTPUT; - break; - - case HiveParser.TOK_TBLTEXTFILE: - inputFormat = org.apache.hadoop.mapred.TextInputFormat.class.getName(); - outputFormat = org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat.class.getName(); - - break; - - case HiveParser.TOK_LIKETABLE: - likeTable = true; - break; - - case HiveParser.TOK_IFNOTEXISTS: - try { - List tables = db.getTablesByPattern(tableName); - if (tables != null && tables.size() > 0) { // table - // exists - return ast; - } - } catch (HiveException e) { - throw new SemanticException(e); - } - break; - - case HiveParser.TOK_TABLEPARTCOLS: - List partCols = BaseSemanticAnalyzer - .getColumns((ASTNode) child.getChild(0), false); - for (FieldSchema fs : partCols) { - if (!fs.getType().equalsIgnoreCase("string")) { - throw new SemanticException( - "Operation not supported. HCatalog only " + - "supports partition columns of type string. " - + "For column: " - + fs.getName() - + " Found type: " + fs.getType()); - } - } - break; - - case HiveParser.TOK_STORAGEHANDLER: - String storageHandler = BaseSemanticAnalyzer - .unescapeSQLString(child.getChild(0).getText()); - if (org.apache.commons.lang.StringUtils - .isNotEmpty(storageHandler)) { - return ast; - } - - break; - - case HiveParser.TOK_TABLEFILEFORMAT: - if (child.getChildCount() < 2) { - throw new SemanticException( - "Incomplete specification of File Format. " + - "You must provide InputFormat, OutputFormat."); - } - inputFormat = BaseSemanticAnalyzer.unescapeSQLString(child - .getChild(0).getText()); - outputFormat = BaseSemanticAnalyzer.unescapeSQLString(child - .getChild(1).getText()); - break; - - case HiveParser.TOK_TBLRCFILE: - inputFormat = RCFileInputFormat.class.getName(); - outputFormat = RCFileOutputFormat.class.getName(); - break; - - } - } - - if (!likeTable && (inputFormat == null || outputFormat == null)) { - throw new SemanticException( - "STORED AS specification is either incomplete or incorrect."); - } - - - return ast; - } - - @Override - public void postAnalyze(HiveSemanticAnalyzerHookContext context, - List> rootTasks) - throws SemanticException { - - if (rootTasks.size() == 0) { - // There will be no DDL task created in case if its CREATE TABLE IF - // NOT EXISTS - return; - } - CreateTableDesc desc = ((DDLTask) rootTasks.get(rootTasks.size() - 1)) - .getWork().getCreateTblDesc(); - if (desc == null) { - // Desc will be null if its CREATE TABLE LIKE. Desc will be - // contained in CreateTableLikeDesc. Currently, HCat disallows CTLT in - // pre-hook. So, desc can never be null. - return; - } - Map tblProps = desc.getTblProps(); - if (tblProps == null) { - // tblProps will be null if user didnt use tblprops in his CREATE - // TABLE cmd. - tblProps = new HashMap(); - - } - - // first check if we will allow the user to create table. - String storageHandler = desc.getStorageHandler(); - if (StringUtils.isEmpty(storageHandler)) { - } else { - try { - HCatStorageHandler storageHandlerInst = HCatUtil - .getStorageHandler(context.getConf(), - desc.getStorageHandler(), - desc.getSerName(), - desc.getInputFormat(), - desc.getOutputFormat()); - //Authorization checks are performed by the storageHandler.getAuthorizationProvider(), if - //StorageDelegationAuthorizationProvider is used. - } catch (IOException e) { - throw new SemanticException(e); - } - } - - if (desc != null) { - try { - Table table = context.getHive().newTable(desc.getTableName()); - if (desc.getLocation() != null) { - table.setDataLocation(new Path(desc.getLocation()).toUri()); - } - if (desc.getStorageHandler() != null) { - table.setProperty( - org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, - desc.getStorageHandler()); - } - for (Map.Entry prop : tblProps.entrySet()) { - table.setProperty(prop.getKey(), prop.getValue()); - } - for (Map.Entry prop : desc.getSerdeProps().entrySet()) { - table.setSerdeParam(prop.getKey(), prop.getValue()); - } - //TODO: set other Table properties as needed - - //authorize against the table operation so that location permissions can be checked if any - - if (HiveConf.getBoolVar(context.getConf(), - HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) { - authorize(table, Privilege.CREATE); - } - } catch (HiveException ex) { - throw new SemanticException(ex); - } - } - - desc.setTblProps(tblProps); - context.getConf().set(HCatConstants.HCAT_CREATE_TBL_NAME, tableName); - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java deleted file mode 100644 index 00a9fbe..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java +++ /dev/null @@ -1,375 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.cli.SemanticAnalysis; - -import java.io.Serializable; -import java.util.List; - -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.parse.ASTNode; -import org.apache.hadoop.hive.ql.parse.AbstractSemanticAnalyzerHook; -import org.apache.hadoop.hive.ql.parse.HiveParser; -import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.AlterTableDesc; -import org.apache.hadoop.hive.ql.plan.DDLWork; -import org.apache.hadoop.hive.ql.plan.DescDatabaseDesc; -import org.apache.hadoop.hive.ql.plan.DescTableDesc; -import org.apache.hadoop.hive.ql.plan.DropDatabaseDesc; -import org.apache.hadoop.hive.ql.plan.DropTableDesc; -import org.apache.hadoop.hive.ql.plan.HiveOperation; -import org.apache.hadoop.hive.ql.plan.PartitionSpec; -import org.apache.hadoop.hive.ql.plan.ShowDatabasesDesc; -import org.apache.hadoop.hive.ql.plan.ShowPartitionsDesc; -import org.apache.hadoop.hive.ql.plan.ShowTableStatusDesc; -import org.apache.hadoop.hive.ql.plan.ShowTablesDesc; -import org.apache.hadoop.hive.ql.plan.SwitchDatabaseDesc; -import org.apache.hadoop.hive.ql.security.authorization.Privilege; -import org.apache.hcatalog.common.ErrorType; -import org.apache.hcatalog.common.HCatException; - -public class HCatSemanticAnalyzer extends HCatSemanticAnalyzerBase { - - private AbstractSemanticAnalyzerHook hook; - private ASTNode ast; - - - @Override - public ASTNode preAnalyze(HiveSemanticAnalyzerHookContext context, ASTNode ast) - throws SemanticException { - - this.ast = ast; - switch (ast.getToken().getType()) { - - // HCat wants to intercept following tokens and special-handle them. - case HiveParser.TOK_CREATETABLE: - hook = new CreateTableHook(); - return hook.preAnalyze(context, ast); - - case HiveParser.TOK_CREATEDATABASE: - hook = new CreateDatabaseHook(); - return hook.preAnalyze(context, ast); - - case HiveParser.TOK_ALTERTABLE_PARTITION: - if (((ASTNode) ast.getChild(1)).getToken().getType() == HiveParser.TOK_ALTERTABLE_FILEFORMAT) { - return ast; - } else if (((ASTNode) ast.getChild(1)).getToken().getType() == HiveParser.TOK_ALTERTABLE_ALTERPARTS_MERGEFILES) { - // unsupported - throw new SemanticException("Operation not supported."); - } else { - return ast; - } - - // HCat will allow these operations to be performed. - // Database DDL - case HiveParser.TOK_SHOWDATABASES: - case HiveParser.TOK_DROPDATABASE: - case HiveParser.TOK_SWITCHDATABASE: - case HiveParser.TOK_DESCDATABASE: - case HiveParser.TOK_ALTERDATABASE_PROPERTIES: - - // Index DDL - case HiveParser.TOK_ALTERINDEX_PROPERTIES: - case HiveParser.TOK_CREATEINDEX: - case HiveParser.TOK_DROPINDEX: - case HiveParser.TOK_SHOWINDEXES: - - // View DDL - // "alter view add partition" does not work because of the nature of implementation - // of the DDL in hive. Hive will internally invoke another Driver on the select statement, - // and HCat does not let "select" statement through. I cannot find a way to get around it - // without modifying hive code. So just leave it unsupported. - //case HiveParser.TOK_ALTERVIEW_ADDPARTS: - case HiveParser.TOK_ALTERVIEW_DROPPARTS: - case HiveParser.TOK_ALTERVIEW_PROPERTIES: - case HiveParser.TOK_ALTERVIEW_RENAME: - case HiveParser.TOK_CREATEVIEW: - case HiveParser.TOK_DROPVIEW: - - // Authorization DDL - case HiveParser.TOK_CREATEROLE: - case HiveParser.TOK_DROPROLE: - case HiveParser.TOK_GRANT_ROLE: - case HiveParser.TOK_GRANT_WITH_OPTION: - case HiveParser.TOK_GRANT: - case HiveParser.TOK_REVOKE_ROLE: - case HiveParser.TOK_REVOKE: - case HiveParser.TOK_SHOW_GRANT: - case HiveParser.TOK_SHOW_ROLE_GRANT: - - // Misc DDL - case HiveParser.TOK_LOCKTABLE: - case HiveParser.TOK_UNLOCKTABLE: - case HiveParser.TOK_SHOWLOCKS: - case HiveParser.TOK_DESCFUNCTION: - case HiveParser.TOK_SHOWFUNCTIONS: - case HiveParser.TOK_EXPLAIN: - - // Table DDL - case HiveParser.TOK_ALTERTABLE_ADDPARTS: - case HiveParser.TOK_ALTERTABLE_ADDCOLS: - case HiveParser.TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION: - case HiveParser.TOK_ALTERTABLE_SERDEPROPERTIES: - case HiveParser.TOK_ALTERTABLE_CLUSTER_SORT: - case HiveParser.TOK_ALTERTABLE_DROPPARTS: - case HiveParser.TOK_ALTERTABLE_PROPERTIES: - case HiveParser.TOK_ALTERTABLE_RENAME: - case HiveParser.TOK_ALTERTABLE_RENAMECOL: - case HiveParser.TOK_ALTERTABLE_REPLACECOLS: - case HiveParser.TOK_ALTERTABLE_SERIALIZER: - case HiveParser.TOK_ALTERTABLE_TOUCH: - case HiveParser.TOK_DESCTABLE: - case HiveParser.TOK_DROPTABLE: - case HiveParser.TOK_SHOW_TABLESTATUS: - case HiveParser.TOK_SHOWPARTITIONS: - case HiveParser.TOK_SHOWTABLES: - return ast; - - // In all other cases, throw an exception. Its a white-list of allowed operations. - default: - throw new SemanticException("Operation not supported."); - - } - } - - @Override - public void postAnalyze(HiveSemanticAnalyzerHookContext context, - List> rootTasks) throws SemanticException { - - try { - - switch (ast.getToken().getType()) { - - case HiveParser.TOK_CREATETABLE: - case HiveParser.TOK_CREATEDATABASE: - case HiveParser.TOK_ALTERTABLE_PARTITION: - - // HCat will allow these operations to be performed. - // Database DDL - case HiveParser.TOK_SHOWDATABASES: - case HiveParser.TOK_DROPDATABASE: - case HiveParser.TOK_SWITCHDATABASE: - case HiveParser.TOK_DESCDATABASE: - case HiveParser.TOK_ALTERDATABASE_PROPERTIES: - - // Index DDL - case HiveParser.TOK_ALTERINDEX_PROPERTIES: - case HiveParser.TOK_CREATEINDEX: - case HiveParser.TOK_DROPINDEX: - case HiveParser.TOK_SHOWINDEXES: - - // View DDL - //case HiveParser.TOK_ALTERVIEW_ADDPARTS: - case HiveParser.TOK_ALTERVIEW_DROPPARTS: - case HiveParser.TOK_ALTERVIEW_PROPERTIES: - case HiveParser.TOK_ALTERVIEW_RENAME: - case HiveParser.TOK_CREATEVIEW: - case HiveParser.TOK_DROPVIEW: - - // Authorization DDL - case HiveParser.TOK_CREATEROLE: - case HiveParser.TOK_DROPROLE: - case HiveParser.TOK_GRANT_ROLE: - case HiveParser.TOK_GRANT_WITH_OPTION: - case HiveParser.TOK_GRANT: - case HiveParser.TOK_REVOKE_ROLE: - case HiveParser.TOK_REVOKE: - case HiveParser.TOK_SHOW_GRANT: - case HiveParser.TOK_SHOW_ROLE_GRANT: - - // Misc DDL - case HiveParser.TOK_LOCKTABLE: - case HiveParser.TOK_UNLOCKTABLE: - case HiveParser.TOK_SHOWLOCKS: - case HiveParser.TOK_DESCFUNCTION: - case HiveParser.TOK_SHOWFUNCTIONS: - case HiveParser.TOK_EXPLAIN: - - // Table DDL - case HiveParser.TOK_ALTERTABLE_ADDPARTS: - case HiveParser.TOK_ALTERTABLE_ADDCOLS: - case HiveParser.TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION: - case HiveParser.TOK_ALTERTABLE_SERDEPROPERTIES: - case HiveParser.TOK_ALTERTABLE_CLUSTER_SORT: - case HiveParser.TOK_ALTERTABLE_DROPPARTS: - case HiveParser.TOK_ALTERTABLE_PROPERTIES: - case HiveParser.TOK_ALTERTABLE_RENAME: - case HiveParser.TOK_ALTERTABLE_RENAMECOL: - case HiveParser.TOK_ALTERTABLE_REPLACECOLS: - case HiveParser.TOK_ALTERTABLE_SERIALIZER: - case HiveParser.TOK_ALTERTABLE_TOUCH: - case HiveParser.TOK_DESCTABLE: - case HiveParser.TOK_DROPTABLE: - case HiveParser.TOK_SHOW_TABLESTATUS: - case HiveParser.TOK_SHOWPARTITIONS: - case HiveParser.TOK_SHOWTABLES: - break; - - default: - throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, "Unexpected token: " + ast.getToken()); - } - - authorizeDDL(context, rootTasks); - - } catch (HCatException e) { - throw new SemanticException(e); - } catch (HiveException e) { - throw new SemanticException(e); - } - - if (hook != null) { - hook.postAnalyze(context, rootTasks); - } - } - - private String extractTableName(String compoundName) { - /* - * the table name can potentially be a dot-format one with column names - * specified as part of the table name. e.g. a.b.c where b is a column in - * a and c is a field of the object/column b etc. For authorization - * purposes, we should use only the first part of the dotted name format. - * - */ - - String[] words = compoundName.split("\\."); - return words[0]; - } - - @Override - protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext cntxt, Hive hive, DDLWork work) - throws HiveException { - // DB opereations, none of them are enforced by Hive right now. - - ShowDatabasesDesc showDatabases = work.getShowDatabasesDesc(); - if (showDatabases != null) { - authorize(HiveOperation.SHOWDATABASES.getInputRequiredPrivileges(), - HiveOperation.SHOWDATABASES.getOutputRequiredPrivileges()); - } - - DropDatabaseDesc dropDb = work.getDropDatabaseDesc(); - if (dropDb != null) { - Database db = cntxt.getHive().getDatabase(dropDb.getDatabaseName()); - authorize(db, Privilege.DROP); - } - - DescDatabaseDesc descDb = work.getDescDatabaseDesc(); - if (descDb != null) { - Database db = cntxt.getHive().getDatabase(descDb.getDatabaseName()); - authorize(db, Privilege.SELECT); - } - - SwitchDatabaseDesc switchDb = work.getSwitchDatabaseDesc(); - if (switchDb != null) { - Database db = cntxt.getHive().getDatabase(switchDb.getDatabaseName()); - authorize(db, Privilege.SELECT); - } - - ShowTablesDesc showTables = work.getShowTblsDesc(); - if (showTables != null) { - String dbName = showTables.getDbName() == null ? cntxt.getHive().getCurrentDatabase() - : showTables.getDbName(); - authorize(cntxt.getHive().getDatabase(dbName), Privilege.SELECT); - } - - ShowTableStatusDesc showTableStatus = work.getShowTblStatusDesc(); - if (showTableStatus != null) { - String dbName = showTableStatus.getDbName() == null ? cntxt.getHive().getCurrentDatabase() - : showTableStatus.getDbName(); - authorize(cntxt.getHive().getDatabase(dbName), Privilege.SELECT); - } - - // TODO: add alter database support in HCat - - // Table operations. - - DropTableDesc dropTable = work.getDropTblDesc(); - if (dropTable != null) { - if (dropTable.getPartSpecs() == null) { - // drop table is already enforced by Hive. We only check for table level location even if the - // table is partitioned. - } else { - //this is actually a ALTER TABLE DROP PARITITION statement - for (PartitionSpec partSpec : dropTable.getPartSpecs()) { - // partitions are not added as write entries in drop partitions in Hive - Table table = hive.getTable(hive.getCurrentDatabase(), dropTable.getTableName()); - List partitions = null; - try { - partitions = hive.getPartitionsByFilter(table, partSpec.toString()); - } catch (Exception e) { - throw new HiveException(e); - } - - for (Partition part : partitions) { - authorize(part, Privilege.DROP); - } - } - } - } - - AlterTableDesc alterTable = work.getAlterTblDesc(); - if (alterTable != null) { - Table table = hive.getTable(hive.getCurrentDatabase(), alterTable.getOldName(), false); - - Partition part = null; - if (alterTable.getPartSpec() != null) { - part = hive.getPartition(table, alterTable.getPartSpec(), false); - } - - String newLocation = alterTable.getNewLocation(); - - /* Hcat requires ALTER_DATA privileges for ALTER TABLE LOCATION statements - * for the old table/partition location and the new location. - */ - if (alterTable.getOp() == AlterTableDesc.AlterTableTypes.ALTERLOCATION) { - if (part != null) { - authorize(part, Privilege.ALTER_DATA); // authorize for the old - // location, and new location - part.setLocation(newLocation); - authorize(part, Privilege.ALTER_DATA); - } else { - authorize(table, Privilege.ALTER_DATA); // authorize for the old - // location, and new location - table.getTTable().getSd().setLocation(newLocation); - authorize(table, Privilege.ALTER_DATA); - } - } - //other alter operations are already supported by Hive - } - - // we should be careful when authorizing table based on just the - // table name. If columns have separate authorization domain, it - // must be honored - DescTableDesc descTable = work.getDescTblDesc(); - if (descTable != null) { - String tableName = extractTableName(descTable.getTableName()); - authorizeTable(cntxt.getHive(), tableName, Privilege.SELECT); - } - - ShowPartitionsDesc showParts = work.getShowPartsDesc(); - if (showParts != null) { - String tableName = extractTableName(showParts.getTabName()); - authorizeTable(cntxt.getHive(), tableName, Privilege.SELECT); - } - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzerBase.java hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzerBase.java deleted file mode 100644 index 4413467..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzerBase.java +++ /dev/null @@ -1,179 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.cli.SemanticAnalysis; - -import java.io.Serializable; -import java.util.List; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.ql.exec.Task; -import org.apache.hadoop.hive.ql.metadata.AuthorizationException; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.InvalidTableException; -import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.parse.AbstractSemanticAnalyzerHook; -import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.DDLWork; -import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; -import org.apache.hadoop.hive.ql.security.authorization.Privilege; -import org.apache.hadoop.hive.ql.session.SessionState; - -/** - * Base class for HCatSemanticAnalyzer hooks. - */ -public class HCatSemanticAnalyzerBase extends AbstractSemanticAnalyzerHook { - - private HiveAuthorizationProvider authProvider; - - protected String getDbName(Hive hive, String dbName) { - return dbName == null ? hive.getCurrentDatabase() : dbName; - } - - public HiveAuthorizationProvider getAuthProvider() { - if (authProvider == null) { - authProvider = SessionState.get().getAuthorizer(); - } - - return authProvider; - } - - @Override - public void postAnalyze(HiveSemanticAnalyzerHookContext context, - List> rootTasks) throws SemanticException { - super.postAnalyze(context, rootTasks); - - //Authorize the operation. - authorizeDDL(context, rootTasks); - } - - /** - * Checks for the given rootTasks, and calls authorizeDDLWork() for each DDLWork to - * be authorized. The hooks should override this, or authorizeDDLWork to perform the - * actual authorization. - */ - /* - * Impl note: Hive provides authorization with it's own model, and calls the defined - * HiveAuthorizationProvider from Driver.doAuthorization(). However, HCat has to - * do additional calls to the auth provider to implement expected behavior for - * StorageDelegationAuthorizationProvider. This means, that the defined auth provider - * is called by both Hive and HCat. The following are missing from Hive's implementation, - * and when they are fixed in Hive, we can remove the HCat-specific auth checks. - * 1. CREATE DATABASE/TABLE, ADD PARTITION statements does not call - * HiveAuthorizationProvider.authorize() with the candidate objects, which means that - * we cannot do checks against defined LOCATION. - * 2. HiveOperation does not define sufficient Privileges for most of the operations, - * especially database operations. - * 3. For some of the operations, Hive SemanticAnalyzer does not add the changed - * object as a WriteEntity or ReadEntity. - * - * @see https://issues.apache.org/jira/browse/HCATALOG-244 - * @see https://issues.apache.org/jira/browse/HCATALOG-245 - */ - protected void authorizeDDL(HiveSemanticAnalyzerHookContext context, - List> rootTasks) throws SemanticException { - - if (!HiveConf.getBoolVar(context.getConf(), - HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) { - return; - } - - Hive hive; - try { - hive = context.getHive(); - - for (Task task : rootTasks) { - if (task.getWork() instanceof DDLWork) { - DDLWork work = (DDLWork) task.getWork(); - if (work != null) { - authorizeDDLWork(context, hive, work); - } - } - } - } catch (SemanticException ex) { - throw ex; - } catch (AuthorizationException ex) { - throw ex; - } catch (Exception ex) { - throw new SemanticException(ex); - } - } - - /** - * Authorized the given DDLWork. Does nothing by default. Override this - * and delegate to the relevant method in HiveAuthorizationProvider obtained by - * getAuthProvider(). - */ - protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext context, - Hive hive, DDLWork work) throws HiveException { - } - - protected void authorize(Privilege[] inputPrivs, Privilege[] outputPrivs) - throws AuthorizationException, SemanticException { - try { - getAuthProvider().authorize(inputPrivs, outputPrivs); - } catch (HiveException ex) { - throw new SemanticException(ex); - } - } - - protected void authorize(Database db, Privilege priv) - throws AuthorizationException, SemanticException { - try { - getAuthProvider().authorize(db, null, new Privilege[]{priv}); - } catch (HiveException ex) { - throw new SemanticException(ex); - } - } - - protected void authorizeTable(Hive hive, String tableName, Privilege priv) - throws AuthorizationException, HiveException { - Table table; - try { - table = hive.getTable(tableName); - } catch (InvalidTableException ite) { - // Table itself doesn't exist in metastore, nothing to validate. - return; - } - - authorize(table, priv); - } - - protected void authorize(Table table, Privilege priv) - throws AuthorizationException, SemanticException { - try { - getAuthProvider().authorize(table, new Privilege[]{priv}, null); - } catch (HiveException ex) { - throw new SemanticException(ex); - } - } - - protected void authorize(Partition part, Privilege priv) - throws AuthorizationException, SemanticException { - try { - getAuthProvider().authorize(part, new Privilege[]{priv}, null); - } catch (HiveException ex) { - throw new SemanticException(ex); - } - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/common/ErrorType.java hcatalog/core/src/main/java/org/apache/hcatalog/common/ErrorType.java deleted file mode 100644 index 5e9acbe..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/common/ErrorType.java +++ /dev/null @@ -1,137 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.common; - -/** - * Enum type representing the various errors throws by HCat. - */ -public enum ErrorType { - - /* HCat Input Format related errors 1000 - 1999 */ - ERROR_DB_INIT (1000, "Error initializing database session"), - ERROR_EXCEED_MAXPART (1001, "Query result exceeded maximum number of partitions allowed"), - - ERROR_SET_INPUT (1002, "Error setting input information"), - - /* HCat Output Format related errors 2000 - 2999 */ - ERROR_INVALID_TABLE (2000, "Table specified does not exist"), - ERROR_SET_OUTPUT (2001, "Error setting output information"), - ERROR_DUPLICATE_PARTITION (2002, "Partition already present with given partition key values"), - ERROR_NON_EMPTY_TABLE (2003, "Non-partitioned table already contains data"), - ERROR_NOT_INITIALIZED (2004, "HCatOutputFormat not initialized, setOutput has to be called"), - ERROR_INIT_STORAGE_HANDLER (2005, "Error initializing storage handler instance"), - ERROR_PUBLISHING_PARTITION (2006, "Error adding partition to metastore"), - ERROR_SCHEMA_COLUMN_MISMATCH (2007, "Invalid column position in partition schema"), - ERROR_SCHEMA_PARTITION_KEY (2008, "Partition key cannot be present in the partition data"), - ERROR_SCHEMA_TYPE_MISMATCH (2009, "Invalid column type in partition schema"), - ERROR_INVALID_PARTITION_VALUES (2010, "Invalid partition values specified"), - ERROR_MISSING_PARTITION_KEY (2011, "Partition key value not provided for publish"), - ERROR_MOVE_FAILED (2012, "Moving of data failed during commit"), - ERROR_TOO_MANY_DYNAMIC_PTNS (2013, "Attempt to create too many dynamic partitions"), - ERROR_INIT_LOADER (2014, "Error initializing Pig loader"), - ERROR_INIT_STORER (2015, "Error initializing Pig storer"), - ERROR_NOT_SUPPORTED (2016, "Error operation not supported"), - - /* Authorization Errors 3000 - 3999 */ - ERROR_ACCESS_CONTROL (3000, "Permission denied"), - - /* Miscellaneous errors, range 9000 - 9998 */ - ERROR_UNIMPLEMENTED (9000, "Functionality currently unimplemented"), - ERROR_INTERNAL_EXCEPTION (9001, "Exception occurred while processing HCat request"); - - /** The error code. */ - private int errorCode; - - /** The error message. */ - private String errorMessage; - - /** Should the causal exception message be appended to the error message, yes by default*/ - private boolean appendCauseMessage = true; - - /** Is this a retriable error, no by default. */ - private boolean isRetriable = false; - - /** - * Instantiates a new error type. - * @param errorCode the error code - * @param errorMessage the error message - */ - private ErrorType(int errorCode, String errorMessage) { - this.errorCode = errorCode; - this.errorMessage = errorMessage; - } - - /** - * Instantiates a new error type. - * @param errorCode the error code - * @param errorMessage the error message - * @param appendCauseMessage should causal exception message be appended to error message - */ - private ErrorType(int errorCode, String errorMessage, boolean appendCauseMessage) { - this.errorCode = errorCode; - this.errorMessage = errorMessage; - this.appendCauseMessage = appendCauseMessage; - } - - /** - * Instantiates a new error type. - * @param errorCode the error code - * @param errorMessage the error message - * @param appendCauseMessage should causal exception message be appended to error message - * @param isRetriable is this a retriable error - */ - private ErrorType(int errorCode, String errorMessage, boolean appendCauseMessage, boolean isRetriable) { - this.errorCode = errorCode; - this.errorMessage = errorMessage; - this.appendCauseMessage = appendCauseMessage; - this.isRetriable = isRetriable; - } - - /** - * Gets the error code. - * @return the error code - */ - public int getErrorCode() { - return errorCode; - } - - /** - * Gets the error message. - * @return the error message - */ - public String getErrorMessage() { - return errorMessage; - } - - /** - * Checks if this is a retriable error. - * @return true, if is a retriable error, false otherwise - */ - public boolean isRetriable() { - return isRetriable; - } - - /** - * Whether the cause of the exception should be added to the error message. - * @return true, if the cause should be added to the message, false otherwise - */ - public boolean appendCauseMessage() { - return appendCauseMessage; - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatConstants.java hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatConstants.java deleted file mode 100644 index cde40e5..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatConstants.java +++ /dev/null @@ -1,186 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.common; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.mapred.SequenceFileInputFormat; -import org.apache.hadoop.mapred.SequenceFileOutputFormat; - -public final class HCatConstants { - - public static final String HIVE_RCFILE_IF_CLASS = "org.apache.hadoop.hive.ql.io.RCFileInputFormat"; - public static final String HIVE_RCFILE_OF_CLASS = "org.apache.hadoop.hive.ql.io.RCFileOutputFormat"; - - public static final String SEQUENCEFILE_INPUT = SequenceFileInputFormat.class.getName(); - public static final String SEQUENCEFILE_OUTPUT = SequenceFileOutputFormat.class.getName(); - - public static final String HCAT_PIG_STORAGE_CLASS = "org.apache.pig.builtin.PigStorage"; - public static final String HCAT_PIG_LOADER = "hcat.pig.loader"; - public static final String HCAT_PIG_LOADER_LOCATION_SET = HCAT_PIG_LOADER + ".location.set"; - public static final String HCAT_PIG_LOADER_ARGS = "hcat.pig.loader.args"; - public static final String HCAT_PIG_STORER = "hcat.pig.storer"; - public static final String HCAT_PIG_STORER_ARGS = "hcat.pig.storer.args"; - public static final String HCAT_PIG_ARGS_DELIMIT = "hcat.pig.args.delimiter"; - public static final String HCAT_PIG_ARGS_DELIMIT_DEFAULT = ","; - public static final String HCAT_PIG_STORER_LOCATION_SET = HCAT_PIG_STORER + ".location.set"; - public static final String HCAT_PIG_INNER_TUPLE_NAME = "hcat.pig.inner.tuple.name"; - public static final String HCAT_PIG_INNER_TUPLE_NAME_DEFAULT = "innertuple"; - public static final String HCAT_PIG_INNER_FIELD_NAME = "hcat.pig.inner.field.name"; - public static final String HCAT_PIG_INNER_FIELD_NAME_DEFAULT = "innerfield"; - - /** - * {@value} (default: null) - * When the property is set in the UDFContext of the org.apache.hcatalog.pig.HCatStorer, HCatStorer writes - * to the location it specifies instead of the default HCatalog location format. An example can be found - * in org.apache.hcatalog.pig.HCatStorerWrapper. - */ - public static final String HCAT_PIG_STORER_EXTERNAL_LOCATION = HCAT_PIG_STORER + ".external.location"; - - //The keys used to store info into the job Configuration - public static final String HCAT_KEY_BASE = "mapreduce.lib.hcat"; - - public static final String HCAT_KEY_OUTPUT_SCHEMA = HCAT_KEY_BASE + ".output.schema"; - - public static final String HCAT_KEY_JOB_INFO = HCAT_KEY_BASE + ".job.info"; - - // hcatalog specific configurations, that can be put in hive-site.xml - public static final String HCAT_HIVE_CLIENT_EXPIRY_TIME = "hcatalog.hive.client.cache.expiry.time"; - - private HCatConstants() { // restrict instantiation - } - - public static final String HCAT_TABLE_SCHEMA = "hcat.table.schema"; - - public static final String HCAT_METASTORE_URI = HiveConf.ConfVars.METASTOREURIS.varname; - - public static final String HCAT_PERMS = "hcat.perms"; - - public static final String HCAT_GROUP = "hcat.group"; - - public static final String HCAT_CREATE_TBL_NAME = "hcat.create.tbl.name"; - - public static final String HCAT_CREATE_DB_NAME = "hcat.create.db.name"; - - public static final String HCAT_METASTORE_PRINCIPAL - = HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL.varname; - - /** - * The desired number of input splits produced for each partition. When the - * input files are large and few, we want to split them into many splits, - * so as to increase the parallelizm of loading the splits. Try also two - * other parameters, mapred.min.split.size and mapred.max.split.size, to - * control the number of input splits. - */ - public static final String HCAT_DESIRED_PARTITION_NUM_SPLITS = - "hcat.desired.partition.num.splits"; - - // IMPORTANT IMPORTANT IMPORTANT!!!!! - //The keys used to store info into the job Configuration. - //If any new keys are added, the HCatStorer needs to be updated. The HCatStorer - //updates the job configuration in the backend to insert these keys to avoid - //having to call setOutput from the backend (which would cause a metastore call - //from the map jobs) - public static final String HCAT_KEY_OUTPUT_BASE = "mapreduce.lib.hcatoutput"; - public static final String HCAT_KEY_OUTPUT_INFO = HCAT_KEY_OUTPUT_BASE + ".info"; - public static final String HCAT_KEY_HIVE_CONF = HCAT_KEY_OUTPUT_BASE + ".hive.conf"; - public static final String HCAT_KEY_TOKEN_SIGNATURE = HCAT_KEY_OUTPUT_BASE + ".token.sig"; - - public static final String[] OUTPUT_CONFS_TO_SAVE = { - HCAT_KEY_OUTPUT_INFO, - HCAT_KEY_HIVE_CONF, - HCAT_KEY_TOKEN_SIGNATURE - }; - - - public static final String HCAT_MSG_CLEAN_FREQ = "hcat.msg.clean.freq"; - public static final String HCAT_MSG_EXPIRY_DURATION = "hcat.msg.expiry.duration"; - - public static final String HCAT_MSGBUS_TOPIC_NAME = "hcat.msgbus.topic.name"; - public static final String HCAT_MSGBUS_TOPIC_NAMING_POLICY = "hcat.msgbus.topic.naming.policy"; - public static final String HCAT_MSGBUS_TOPIC_PREFIX = "hcat.msgbus.topic.prefix"; - - public static final String HCAT_DYNAMIC_PTN_JOBID = HCAT_KEY_OUTPUT_BASE + "dynamic.jobid"; - public static final boolean HCAT_IS_DYNAMIC_MAX_PTN_CHECK_ENABLED = false; - - // Message Bus related properties. - public static final String HCAT_DEFAULT_TOPIC_PREFIX = "hcat"; - public static final String HCAT_EVENT = "HCAT_EVENT"; - public static final String HCAT_ADD_PARTITION_EVENT = "ADD_PARTITION"; - public static final String HCAT_DROP_PARTITION_EVENT = "DROP_PARTITION"; - public static final String HCAT_PARTITION_DONE_EVENT = "PARTITION_DONE"; - public static final String HCAT_CREATE_TABLE_EVENT = "CREATE_TABLE"; - public static final String HCAT_DROP_TABLE_EVENT = "DROP_TABLE"; - public static final String HCAT_CREATE_DATABASE_EVENT = "CREATE_DATABASE"; - public static final String HCAT_DROP_DATABASE_EVENT = "DROP_DATABASE"; - public static final String HCAT_MESSAGE_VERSION = "HCAT_MESSAGE_VERSION"; - public static final String HCAT_MESSAGE_FORMAT = "HCAT_MESSAGE_FORMAT"; - public static final String CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX = "hcatalog.message.factory.impl."; - public static final String CONF_LABEL_HCAT_MESSAGE_FORMAT = "hcatalog.message.format"; - public static final String DEFAULT_MESSAGE_FACTORY_IMPL = "org.apache.hcatalog.messaging.json.JSONMessageFactory"; - - // System environment variables - public static final String SYSENV_HADOOP_TOKEN_FILE_LOCATION = "HADOOP_TOKEN_FILE_LOCATION"; - - // Hadoop Conf Var Names - public static final String CONF_MAPREDUCE_JOB_CREDENTIALS_BINARY = "mapreduce.job.credentials.binary"; - - //*************************************************************************** - // Data-related configuration properties. - //*************************************************************************** - - /** - * {@value} (default: {@value #HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT}). - * Pig < 0.10.0 does not have boolean support, and scripts written for pre-boolean Pig versions - * will not expect boolean values when upgrading Pig. For integration the option is offered to - * convert boolean fields to integers by setting this Hadoop configuration key. - */ - public static final String HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER = - "hcat.data.convert.boolean.to.integer"; - public static final boolean HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT = false; - - /** - * {@value} (default: {@value #HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT}). - * Hive tables support tinyint and smallint columns, while not all processing frameworks support - * these types (Pig only has integer for example). Enable this property to promote tinyint and - * smallint columns to integer at runtime. Note that writes to tinyint and smallint columns - * enforce bounds checking and jobs will fail if attempting to write values outside the column - * bounds. - */ - public static final String HCAT_DATA_TINY_SMALL_INT_PROMOTION = - "hcat.data.tiny.small.int.promotion"; - public static final boolean HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT = false; - - /** - * {@value} (default: {@value #HCAT_INPUT_BAD_RECORD_THRESHOLD_DEFAULT}). - * Threshold for the ratio of bad records that will be silently skipped without causing a task - * failure. This is useful when processing large data sets with corrupt records, when its - * acceptable to skip some bad records. - */ - public static final String HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY = "hcat.input.bad.record.threshold"; - public static final float HCAT_INPUT_BAD_RECORD_THRESHOLD_DEFAULT = 0.0001f; - - /** - * {@value} (default: {@value #HCAT_INPUT_BAD_RECORD_MIN_DEFAULT}). - * Number of bad records that will be accepted before applying - * {@value #HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY}. This is necessary to prevent an initial bad - * record from causing a task failure. - */ - public static final String HCAT_INPUT_BAD_RECORD_MIN_KEY = "hcat.input.bad.record.min"; - public static final int HCAT_INPUT_BAD_RECORD_MIN_DEFAULT = 2; -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatContext.java hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatContext.java deleted file mode 100644 index d3daaa1..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatContext.java +++ /dev/null @@ -1,87 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.common; - -import com.google.common.base.Optional; -import com.google.common.base.Preconditions; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.common.classification.InterfaceAudience; -import org.apache.hadoop.hive.common.classification.InterfaceStability; - -import java.util.Map; - -/** - * HCatContext is a singleton that provides global access to configuration data. - * - *

HCatalog provides a variety of functionality that users can configure at runtime through - * configuration properties. Available configuration properties are defined in - * {@link HCatConstants}. HCatContext allows users to enable optional functionality by - * setting properties in a provided configuration.

- * - *

HCatalog users (MR apps, processing framework adapters) should set properties - * in a configuration that has been provided to - * {@link #setConf(org.apache.hadoop.conf.Configuration)} to enable optional functionality. - * The job configuration must be used to ensure properties are passed to the backend MR tasks.

- * - *

HCatalog developers should enable optional functionality by checking properties - * from {@link #getConf()}. Since users are not obligated to set a configuration, optional - * functionality must provide a sensible default.

- */ -@InterfaceAudience.Public -@InterfaceStability.Evolving -public enum HCatContext { - INSTANCE; - - private Configuration conf = null; - - /** - * Use the given configuration for optional behavior. Keys exclusive to an existing config - * are set in the new conf. The job conf must be used to ensure properties are passed to - * backend MR tasks. - */ - public synchronized HCatContext setConf(Configuration newConf) { - Preconditions.checkNotNull(newConf, "Required parameter 'newConf' must not be null."); - - if (conf == null) { - conf = newConf; - return this; - } - - if (conf != newConf) { - for (Map.Entry entry : conf) { - if ((entry.getKey().matches("hcat.*")) && (newConf.get(entry.getKey()) == null)) { - newConf.set(entry.getKey(), entry.getValue()); - } - } - conf = newConf; - } - return this; - } - - /** - * Get the configuration, if there is one. Users are not required to setup HCatContext - * unless they wish to override default behavior, so the configuration may not be present. - * - * @return an Optional that might contain a Configuration - */ - public Optional getConf() { - return Optional.fromNullable(conf); - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatException.java hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatException.java deleted file mode 100644 index bad05d8..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatException.java +++ /dev/null @@ -1,159 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.common; - -import java.io.IOException; - -/** - * Class representing exceptions thrown by HCat. - */ -public class HCatException extends IOException { - - private static final long serialVersionUID = 1L; - - /** The error type enum for this exception. */ - private final ErrorType errorType; - - /** - * Instantiates a new hcat exception. - * @param errorType the error type - */ - public HCatException(ErrorType errorType) { - this(errorType, null, null); - } - - - /** - * Instantiates a new hcat exception. - * @param errorType the error type - * @param cause the cause - */ - public HCatException(ErrorType errorType, Throwable cause) { - this(errorType, null, cause); - } - - /** - * Instantiates a new hcat exception. - * @param errorType the error type - * @param extraMessage extra messages to add to the message string - */ - public HCatException(ErrorType errorType, String extraMessage) { - this(errorType, extraMessage, null); - } - - /** - * Instantiates a new hcat exception. - * @param errorType the error type - * @param extraMessage extra messages to add to the message string - * @param cause the cause - */ - public HCatException(ErrorType errorType, String extraMessage, Throwable cause) { - super(buildErrorMessage( - errorType, - extraMessage, - cause), cause); - this.errorType = errorType; - } - - - //TODO : remove default error type constructors after all exceptions - //are changed to use error types - - /** - * Instantiates a new hcat exception. - * @param message the error message - */ - public HCatException(String message) { - this(ErrorType.ERROR_INTERNAL_EXCEPTION, message, null); - } - - /** - * Instantiates a new hcat exception. - * @param message the error message - * @param cause the cause - */ - public HCatException(String message, Throwable cause) { - this(ErrorType.ERROR_INTERNAL_EXCEPTION, message, cause); - } - - - /** - * Builds the error message string. The error type message is appended with the extra message. If appendCause - * is true for the error type, then the message of the cause also is added to the message. - * @param type the error type - * @param extraMessage the extra message string - * @param cause the cause for the exception - * @return the exception message string - */ - public static String buildErrorMessage(ErrorType type, String extraMessage, Throwable cause) { - - //Initial message is just the error type message - StringBuffer message = new StringBuffer(HCatException.class.getName()); - message.append(" : " + type.getErrorCode()); - message.append(" : " + type.getErrorMessage()); - - if (extraMessage != null) { - //Add the extra message value to buffer - message.append(" : " + extraMessage); - } - - if (type.appendCauseMessage()) { - if (cause != null) { - //Add the cause message to buffer - message.append(". Cause : " + cause.toString()); - } - } - - return message.toString(); - } - - - /** - * Is this a retriable error. - * @return is it retriable - */ - public boolean isRetriable() { - return errorType.isRetriable(); - } - - /** - * Gets the error type. - * @return the error type enum - */ - public ErrorType getErrorType() { - return errorType; - } - - /** - * Gets the error code. - * @return the error code - */ - public int getErrorCode() { - return errorType.getErrorCode(); - } - - /* (non-Javadoc) - * @see java.lang.Throwable#toString() - */ - @Override - public String toString() { - return getMessage(); - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatUtil.java hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatUtil.java deleted file mode 100644 index 6447b22..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/common/HCatUtil.java +++ /dev/null @@ -1,627 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.common; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.hive.common.JavaUtils; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; -import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.plan.TableDesc; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.thrift.DelegationTokenIdentifier; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapred.JobClient; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier; -import org.apache.hadoop.util.ReflectionUtils; -import org.apache.hcatalog.data.Pair; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; -import org.apache.hcatalog.mapreduce.FosterStorageHandler; -import org.apache.hcatalog.mapreduce.HCatOutputFormat; -import org.apache.hcatalog.mapreduce.HCatStorageHandler; -import org.apache.hcatalog.mapreduce.InputJobInfo; -import org.apache.hcatalog.mapreduce.OutputJobInfo; -import org.apache.hcatalog.mapreduce.PartInfo; -import org.apache.hcatalog.mapreduce.StorerInfo; -import org.apache.thrift.TException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.security.auth.login.LoginException; - -public class HCatUtil { - - private static final Logger LOG = LoggerFactory.getLogger(HCatUtil.class); - private static volatile HiveClientCache hiveClientCache; - private final static int DEFAULT_HIVE_CACHE_EXPIRY_TIME_SECONDS = 2 * 60; - - public static boolean checkJobContextIfRunningFromBackend(JobContext j) { - if (j.getConfiguration().get("mapred.task.id", "").equals("") && - !("true".equals(j.getConfiguration().get("pig.illustrating")))) { - return false; - } - return true; - } - - public static String serialize(Serializable obj) throws IOException { - if (obj == null) { - return ""; - } - try { - ByteArrayOutputStream serialObj = new ByteArrayOutputStream(); - ObjectOutputStream objStream = new ObjectOutputStream(serialObj); - objStream.writeObject(obj); - objStream.close(); - return encodeBytes(serialObj.toByteArray()); - } catch (Exception e) { - throw new IOException("Serialization error: " + e.getMessage(), e); - } - } - - public static Object deserialize(String str) throws IOException { - if (str == null || str.length() == 0) { - return null; - } - try { - ByteArrayInputStream serialObj = new ByteArrayInputStream( - decodeBytes(str)); - ObjectInputStream objStream = new ObjectInputStream(serialObj); - return objStream.readObject(); - } catch (Exception e) { - throw new IOException("Deserialization error: " + e.getMessage(), e); - } - } - - public static String encodeBytes(byte[] bytes) { - StringBuffer strBuf = new StringBuffer(); - - for (int i = 0; i < bytes.length; i++) { - strBuf.append((char) (((bytes[i] >> 4) & 0xF) + ('a'))); - strBuf.append((char) (((bytes[i]) & 0xF) + ('a'))); - } - - return strBuf.toString(); - } - - public static byte[] decodeBytes(String str) { - byte[] bytes = new byte[str.length() / 2]; - for (int i = 0; i < str.length(); i += 2) { - char c = str.charAt(i); - bytes[i / 2] = (byte) ((c - 'a') << 4); - c = str.charAt(i + 1); - bytes[i / 2] += (c - 'a'); - } - return bytes; - } - - public static List getHCatFieldSchemaList( - FieldSchema... fields) throws HCatException { - List result = new ArrayList( - fields.length); - - for (FieldSchema f : fields) { - result.add(HCatSchemaUtils.getHCatFieldSchema(f)); - } - - return result; - } - - public static List getHCatFieldSchemaList( - List fields) throws HCatException { - if (fields == null) { - return null; - } else { - List result = new ArrayList(); - for (FieldSchema f : fields) { - result.add(HCatSchemaUtils.getHCatFieldSchema(f)); - } - return result; - } - } - - public static HCatSchema extractSchema(Table table) throws HCatException { - return new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getCols())); - } - - public static HCatSchema extractSchema(Partition partition) throws HCatException { - return new HCatSchema(HCatUtil.getHCatFieldSchemaList(partition.getCols())); - } - - public static List getFieldSchemaList( - List hcatFields) { - if (hcatFields == null) { - return null; - } else { - List result = new ArrayList(); - for (HCatFieldSchema f : hcatFields) { - result.add(HCatSchemaUtils.getFieldSchema(f)); - } - return result; - } - } - - public static Table getTable(HiveMetaStoreClient client, String dbName, String tableName) - throws NoSuchObjectException, TException, MetaException { - return new Table(client.getTable(dbName, tableName)); - } - - public static HCatSchema getTableSchemaWithPtnCols(Table table) throws IOException { - HCatSchema tableSchema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getCols())); - - if (table.getPartitionKeys().size() != 0) { - - // add partition keys to table schema - // NOTE : this assumes that we do not ever have ptn keys as columns - // inside the table schema as well! - for (FieldSchema fs : table.getPartitionKeys()) { - tableSchema.append(HCatSchemaUtils.getHCatFieldSchema(fs)); - } - } - return tableSchema; - } - - /** - * return the partition columns from a table instance - * - * @param table the instance to extract partition columns from - * @return HCatSchema instance which contains the partition columns - * @throws IOException - */ - public static HCatSchema getPartitionColumns(Table table) throws IOException { - HCatSchema cols = new HCatSchema(new LinkedList()); - if (table.getPartitionKeys().size() != 0) { - for (FieldSchema fs : table.getPartitionKeys()) { - cols.append(HCatSchemaUtils.getHCatFieldSchema(fs)); - } - } - return cols; - } - - /** - * Validate partition schema, checks if the column types match between the - * partition and the existing table schema. Returns the list of columns - * present in the partition but not in the table. - * - * @param table the table - * @param partitionSchema the partition schema - * @return the list of newly added fields - * @throws IOException Signals that an I/O exception has occurred. - */ - public static List validatePartitionSchema(Table table, - HCatSchema partitionSchema) throws IOException { - Map partitionKeyMap = new HashMap(); - - for (FieldSchema field : table.getPartitionKeys()) { - partitionKeyMap.put(field.getName().toLowerCase(), field); - } - - List tableCols = table.getCols(); - List newFields = new ArrayList(); - - for (int i = 0; i < partitionSchema.getFields().size(); i++) { - - FieldSchema field = HCatSchemaUtils.getFieldSchema(partitionSchema - .getFields().get(i)); - - FieldSchema tableField; - if (i < tableCols.size()) { - tableField = tableCols.get(i); - - if (!tableField.getName().equalsIgnoreCase(field.getName())) { - throw new HCatException( - ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, - "Expected column <" + tableField.getName() - + "> at position " + (i + 1) - + ", found column <" + field.getName() - + ">"); - } - } else { - tableField = partitionKeyMap.get(field.getName().toLowerCase()); - - if (tableField != null) { - throw new HCatException( - ErrorType.ERROR_SCHEMA_PARTITION_KEY, "Key <" - + field.getName() + ">"); - } - } - - if (tableField == null) { - // field present in partition but not in table - newFields.add(field); - } else { - // field present in both. validate type has not changed - TypeInfo partitionType = TypeInfoUtils - .getTypeInfoFromTypeString(field.getType()); - TypeInfo tableType = TypeInfoUtils - .getTypeInfoFromTypeString(tableField.getType()); - - if (!partitionType.equals(tableType)) { - throw new HCatException( - ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, "Column <" - + field.getName() + ">, expected <" - + tableType.getTypeName() + ">, got <" - + partitionType.getTypeName() + ">"); - } - } - } - - return newFields; - } - - /** - * Test if the first FsAction is more permissive than the second. This is - * useful in cases where we want to ensure that a file owner has more - * permissions than the group they belong to, for eg. More completely(but - * potentially more cryptically) owner-r >= group-r >= world-r : bitwise - * and-masked with 0444 => 444 >= 440 >= 400 >= 000 owner-w >= group-w >= - * world-w : bitwise and-masked with &0222 => 222 >= 220 >= 200 >= 000 - * owner-x >= group-x >= world-x : bitwise and-masked with &0111 => 111 >= - * 110 >= 100 >= 000 - * - * @return true if first FsAction is more permissive than the second, false - * if not. - */ - public static boolean validateMorePermissive(FsAction first, FsAction second) { - if ((first == FsAction.ALL) || (second == FsAction.NONE) - || (first == second)) { - return true; - } - switch (first) { - case READ_EXECUTE: - return ((second == FsAction.READ) || (second == FsAction.EXECUTE)); - case READ_WRITE: - return ((second == FsAction.READ) || (second == FsAction.WRITE)); - case WRITE_EXECUTE: - return ((second == FsAction.WRITE) || (second == FsAction.EXECUTE)); - } - return false; - } - - /** - * Ensure that read or write permissions are not granted without also - * granting execute permissions. Essentially, r-- , rw- and -w- are invalid, - * r-x, -wx, rwx, ---, --x are valid - * - * @param perms The FsAction to verify - * @return true if the presence of read or write permission is accompanied - * by execute permissions - */ - public static boolean validateExecuteBitPresentIfReadOrWrite(FsAction perms) { - if ((perms == FsAction.READ) || (perms == FsAction.WRITE) - || (perms == FsAction.READ_WRITE)) { - return false; - } - return true; - } - - public static Token getJobTrackerDelegationToken( - Configuration conf, String userName) throws Exception { - // LOG.info("getJobTrackerDelegationToken("+conf+","+userName+")"); - JobClient jcl = new JobClient(new JobConf(conf, HCatOutputFormat.class)); - Token t = jcl - .getDelegationToken(new Text(userName)); - // LOG.info("got "+t); - return t; - - // return null; - } - - public static Token extractThriftToken( - String tokenStrForm, String tokenSignature) throws MetaException, - TException, IOException { - // LOG.info("extractThriftToken("+tokenStrForm+","+tokenSignature+")"); - Token t = new Token(); - t.decodeFromUrlString(tokenStrForm); - t.setService(new Text(tokenSignature)); - // LOG.info("returning "+t); - return t; - } - - /** - * Create an instance of a storage handler defined in storerInfo. If one cannot be found - * then FosterStorageHandler is used to encapsulate the InputFormat, OutputFormat and SerDe. - * This StorageHandler assumes the other supplied storage artifacts are for a file-based storage system. - * @param conf job's configuration will be used to configure the Configurable StorageHandler - * @param storerInfo StorerInfo to definining the StorageHandler and InputFormat, OutputFormat and SerDe - * @return storageHandler instance - * @throws IOException - */ - public static HCatStorageHandler getStorageHandler(Configuration conf, StorerInfo storerInfo) throws IOException { - return getStorageHandler(conf, - storerInfo.getStorageHandlerClass(), - storerInfo.getSerdeClass(), - storerInfo.getIfClass(), - storerInfo.getOfClass()); - } - - public static HCatStorageHandler getStorageHandler(Configuration conf, PartInfo partitionInfo) throws IOException { - return HCatUtil.getStorageHandler( - conf, - partitionInfo.getStorageHandlerClassName(), - partitionInfo.getSerdeClassName(), - partitionInfo.getInputFormatClassName(), - partitionInfo.getOutputFormatClassName()); - } - - /** - * Create an instance of a storage handler. If storageHandler == null, - * then surrrogate StorageHandler is used to encapsulate the InputFormat, OutputFormat and SerDe. - * This StorageHandler assumes the other supplied storage artifacts are for a file-based storage system. - * @param conf job's configuration will be used to configure the Configurable StorageHandler - * @param storageHandler fully qualified class name of the desired StorageHandle instance - * @param serDe fully qualified class name of the desired SerDe instance - * @param inputFormat fully qualified class name of the desired InputFormat instance - * @param outputFormat fully qualified class name of the desired outputFormat instance - * @return storageHandler instance - * @throws IOException - */ - public static HCatStorageHandler getStorageHandler(Configuration conf, - String storageHandler, - String serDe, - String inputFormat, - String outputFormat) - throws IOException { - - if ((storageHandler == null) || (storageHandler.equals(FosterStorageHandler.class.getName()))) { - try { - FosterStorageHandler fosterStorageHandler = - new FosterStorageHandler(inputFormat, outputFormat, serDe); - fosterStorageHandler.setConf(conf); - return fosterStorageHandler; - } catch (ClassNotFoundException e) { - throw new IOException("Failed to load " - + "foster storage handler", e); - } - } - - try { - Class handlerClass = - (Class) Class - .forName(storageHandler, true, JavaUtils.getClassLoader()); - return (HCatStorageHandler) ReflectionUtils.newInstance( - handlerClass, conf); - } catch (ClassNotFoundException e) { - throw new IOException("Error in loading storage handler." - + e.getMessage(), e); - } - } - - public static Pair getDbAndTableName(String tableName) throws IOException { - String[] dbTableNametokens = tableName.split("\\."); - if (dbTableNametokens.length == 1) { - return new Pair(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); - } else if (dbTableNametokens.length == 2) { - return new Pair(dbTableNametokens[0], dbTableNametokens[1]); - } else { - throw new IOException("tableName expected in the form " - + ". or
. Got " + tableName); - } - } - - public static Map - getInputJobProperties(HCatStorageHandler storageHandler, - InputJobInfo inputJobInfo) { - TableDesc tableDesc = new TableDesc(storageHandler.getSerDeClass(), - storageHandler.getInputFormatClass(), - storageHandler.getOutputFormatClass(), - inputJobInfo.getTableInfo().getStorerInfo().getProperties()); - if (tableDesc.getJobProperties() == null) { - tableDesc.setJobProperties(new HashMap()); - } - - Map jobProperties = new HashMap(); - try { - tableDesc.getJobProperties().put( - HCatConstants.HCAT_KEY_JOB_INFO, - HCatUtil.serialize(inputJobInfo)); - - storageHandler.configureInputJobProperties(tableDesc, - jobProperties); - - } catch (IOException e) { - throw new IllegalStateException( - "Failed to configure StorageHandler", e); - } - - return jobProperties; - } - - @InterfaceAudience.Private - @InterfaceStability.Evolving - public static void - configureOutputStorageHandler(HCatStorageHandler storageHandler, - Configuration conf, - OutputJobInfo outputJobInfo) { - //TODO replace IgnoreKeyTextOutputFormat with a - //HiveOutputFormatWrapper in StorageHandler - TableDesc tableDesc = new TableDesc(storageHandler.getSerDeClass(), - storageHandler.getInputFormatClass(), - IgnoreKeyTextOutputFormat.class, - outputJobInfo.getTableInfo().getStorerInfo().getProperties()); - if (tableDesc.getJobProperties() == null) - tableDesc.setJobProperties(new HashMap()); - for (Map.Entry el : conf) { - tableDesc.getJobProperties().put(el.getKey(), el.getValue()); - } - - Map jobProperties = new HashMap(); - try { - tableDesc.getJobProperties().put( - HCatConstants.HCAT_KEY_OUTPUT_INFO, - HCatUtil.serialize(outputJobInfo)); - - storageHandler.configureOutputJobProperties(tableDesc, - jobProperties); - - for (Map.Entry el : jobProperties.entrySet()) { - conf.set(el.getKey(), el.getValue()); - } - } catch (IOException e) { - throw new IllegalStateException( - "Failed to configure StorageHandler", e); - } - } - - /** - * Replace the contents of dest with the contents of src - * @param src - * @param dest - */ - public static void copyConf(Configuration src, Configuration dest) { - dest.clear(); - for (Map.Entry el : src) { - dest.set(el.getKey(), el.getValue()); - } - } - - /** - * Get or create a hive client depending on whether it exits in cache or not - * @param hiveConf The hive configuration - * @return the client - * @throws MetaException When HiveMetaStoreClient couldn't be created - * @throws IOException - */ - public static HiveMetaStoreClient getHiveClient(HiveConf hiveConf) - throws MetaException, IOException { - - // Singleton behaviour: create the cache instance if required. The cache needs to be created lazily and - // using the expiry time available in hiveConf. - - if (hiveClientCache == null) { - synchronized (HiveMetaStoreClient.class) { - if (hiveClientCache == null) { - hiveClientCache = new HiveClientCache(hiveConf.getInt(HCatConstants.HCAT_HIVE_CLIENT_EXPIRY_TIME, - DEFAULT_HIVE_CACHE_EXPIRY_TIME_SECONDS)); - } - } - } - try { - return hiveClientCache.get(hiveConf); - } catch (LoginException e) { - throw new IOException("Couldn't create hiveMetaStoreClient, Error getting UGI for user", e); - } - } - - public static void closeHiveClientQuietly(HiveMetaStoreClient client) { - try { - if (client != null) - client.close(); - } catch (Exception e) { - LOG.debug("Error closing metastore client. Ignored the error.", e); - } - } - - public static HiveConf getHiveConf(Configuration conf) - throws IOException { - - HiveConf hiveConf = new HiveConf(conf, HCatUtil.class); - - //copy the hive conf into the job conf and restore it - //in the backend context - if (conf.get(HCatConstants.HCAT_KEY_HIVE_CONF) == null) { - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(hiveConf.getAllProperties())); - } else { - //Copy configuration properties into the hive conf - Properties properties = (Properties) HCatUtil.deserialize( - conf.get(HCatConstants.HCAT_KEY_HIVE_CONF)); - - for (Map.Entry prop : properties.entrySet()) { - if (prop.getValue() instanceof String) { - hiveConf.set((String) prop.getKey(), (String) prop.getValue()); - } else if (prop.getValue() instanceof Integer) { - hiveConf.setInt((String) prop.getKey(), - (Integer) prop.getValue()); - } else if (prop.getValue() instanceof Boolean) { - hiveConf.setBoolean((String) prop.getKey(), - (Boolean) prop.getValue()); - } else if (prop.getValue() instanceof Long) { - hiveConf.setLong((String) prop.getKey(), (Long) prop.getValue()); - } else if (prop.getValue() instanceof Float) { - hiveConf.setFloat((String) prop.getKey(), - (Float) prop.getValue()); - } - } - } - - if (conf.get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { - hiveConf.set("hive.metastore.token.signature", - conf.get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE)); - } - - return hiveConf; - } - - - public static JobConf getJobConfFromContext(JobContext jobContext) { - JobConf jobConf; - // we need to convert the jobContext into a jobConf - // 0.18 jobConf (Hive) vs 0.20+ jobContext (HCat) - // begin conversion.. - jobConf = new JobConf(jobContext.getConfiguration()); - // ..end of conversion - - - return jobConf; - } - - public static void copyJobPropertiesToJobConf( - Map jobProperties, JobConf jobConf) { - for (Map.Entry entry : jobProperties.entrySet()) { - jobConf.set(entry.getKey(), entry.getValue()); - } - } - - - public static boolean isHadoop23() { - String version = org.apache.hadoop.util.VersionInfo.getVersion(); - if (version.matches("\\b0\\.23\\..+\\b")||version.matches("\\b2\\..*")) - return true; - return false; - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/common/HiveClientCache.java hcatalog/core/src/main/java/org/apache/hcatalog/common/HiveClientCache.java deleted file mode 100644 index 93c1863..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/common/HiveClientCache.java +++ /dev/null @@ -1,337 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.common; - -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.RemovalListener; -import com.google.common.cache.RemovalNotification; -import org.apache.commons.lang.builder.EqualsBuilder; -import org.apache.commons.lang.builder.HashCodeBuilder; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.thrift.TException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.security.auth.login.LoginException; -import java.io.IOException; -import java.util.concurrent.Callable; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; - -/** - * A thread safe time expired cache for HiveMetaStoreClient - */ -class HiveClientCache { - final private Cache hiveCache; - private static final Logger LOG = LoggerFactory.getLogger(HiveClientCache.class); - private final int timeout; - // This lock is used to make sure removalListener won't close a client that is being contemplated for returning by get() - private final Object CACHE_TEARDOWN_LOCK = new Object(); - - private static final AtomicInteger nextId = new AtomicInteger(0); - - // Since HiveMetaStoreClient is not threadsafe, hive clients are not shared across threads. - // Thread local variable containing each thread's unique ID, is used as one of the keys for the cache - // causing each thread to get a different client even if the hiveConf is same. - private static final ThreadLocal threadId = - new ThreadLocal() { - @Override - protected Integer initialValue() { - return nextId.getAndIncrement(); - } - }; - - private int getThreadId() { - return threadId.get(); - } - - /** - * @param timeout the length of time in seconds after a client is created that it should be automatically removed - */ - public HiveClientCache(final int timeout) { - this.timeout = timeout; - RemovalListener removalListener = - new RemovalListener() { - public void onRemoval(RemovalNotification notification) { - CacheableHiveMetaStoreClient hiveMetaStoreClient = notification.getValue(); - if (hiveMetaStoreClient != null) { - synchronized (CACHE_TEARDOWN_LOCK) { - hiveMetaStoreClient.setExpiredFromCache(); - hiveMetaStoreClient.tearDownIfUnused(); - } - } - } - }; - hiveCache = CacheBuilder.newBuilder() - .expireAfterWrite(timeout, TimeUnit.SECONDS) - .removalListener(removalListener) - .build(); - - // Add a shutdown hook for cleanup, if there are elements remaining in the cache which were not cleaned up. - // This is the best effort approach. Ignore any error while doing so. Notice that most of the clients - // would get cleaned up via either the removalListener or the close() call, only the active clients - // that are in the cache or expired but being used in other threads wont get cleaned. The following code will only - // clean the active cache ones. The ones expired from cache but being hold by other threads are in the mercy - // of finalize() being called. - Thread cleanupHiveClientShutdownThread = new Thread() { - @Override - public void run() { - LOG.debug("Cleaning up hive client cache in ShutDown hook"); - closeAllClientsQuietly(); - } - }; - Runtime.getRuntime().addShutdownHook(cleanupHiveClientShutdownThread); - } - - /** - * Note: This doesn't check if they are being used or not, meant only to be called during shutdown etc. - */ - void closeAllClientsQuietly() { - try { - ConcurrentMap elements = hiveCache.asMap(); - for (CacheableHiveMetaStoreClient cacheableHiveMetaStoreClient : elements.values()) { - cacheableHiveMetaStoreClient.tearDown(); - } - } catch (Exception e) { - LOG.warn("Clean up of hive clients in the cache failed. Ignored", e); - } - } - - public void cleanup() { - hiveCache.cleanUp(); - } - - /** - * Returns a cached client if exists or else creates one, caches and returns it. It also checks that the client is - * healthy and can be reused - * @param hiveConf - * @return the hive client - * @throws MetaException - * @throws IOException - * @throws LoginException - */ - public HiveMetaStoreClient get(final HiveConf hiveConf) throws MetaException, IOException, LoginException { - final HiveClientCacheKey cacheKey = HiveClientCacheKey.fromHiveConf(hiveConf, getThreadId()); - CacheableHiveMetaStoreClient hiveMetaStoreClient = null; - // the hmsc is not shared across threads. So the only way it could get closed while we are doing healthcheck - // is if removalListener closes it. The synchronization takes care that removalListener won't do it - synchronized (CACHE_TEARDOWN_LOCK) { - hiveMetaStoreClient = getOrCreate(cacheKey); - hiveMetaStoreClient.acquire(); - } - if (!hiveMetaStoreClient.isOpen()) { - synchronized (CACHE_TEARDOWN_LOCK) { - hiveCache.invalidate(cacheKey); - hiveMetaStoreClient.close(); - hiveMetaStoreClient = getOrCreate(cacheKey); - hiveMetaStoreClient.acquire(); - } - } - return hiveMetaStoreClient; - } - - /** - * Return from cache if exists else create/cache and return - * @param cacheKey - * @return - * @throws IOException - * @throws MetaException - * @throws LoginException - */ - private CacheableHiveMetaStoreClient getOrCreate(final HiveClientCacheKey cacheKey) throws IOException, MetaException, LoginException { - try { - return hiveCache.get(cacheKey, new Callable() { - @Override - public CacheableHiveMetaStoreClient call() throws MetaException { - return new CacheableHiveMetaStoreClient(cacheKey.getHiveConf(), timeout); - } - }); - } catch (ExecutionException e) { - Throwable t = e.getCause(); - if (t instanceof IOException) { - throw (IOException) t; - } else if (t instanceof MetaException) { - throw (MetaException) t; - } else if (t instanceof LoginException) { - throw (LoginException) t; - } else { - throw new IOException("Error creating hiveMetaStoreClient", t); - } - } - } - - /** - * A class to wrap HiveConf and expose equality based only on UserGroupInformation and the metaStoreURIs. - * This becomes the key for the cache and this way the same HiveMetaStoreClient would be returned if - * UserGroupInformation and metaStoreURIs are same. This function can evolve to express - * the cases when HiveConf is different but the same hiveMetaStoreClient can be used - */ - public static class HiveClientCacheKey { - final private String metaStoreURIs; - final private UserGroupInformation ugi; - final private HiveConf hiveConf; - final private int threadId; - - private HiveClientCacheKey(HiveConf hiveConf, final int threadId) throws IOException, LoginException { - this.metaStoreURIs = hiveConf.getVar(HiveConf.ConfVars.METASTOREURIS); - ugi = ShimLoader.getHadoopShims().getUGIForConf(hiveConf); - this.hiveConf = hiveConf; - this.threadId = threadId; - } - - public static HiveClientCacheKey fromHiveConf(HiveConf hiveConf, final int threadId) throws IOException, LoginException { - return new HiveClientCacheKey(hiveConf, threadId); - } - - public HiveConf getHiveConf() { - return hiveConf; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - HiveClientCacheKey that = (HiveClientCacheKey) o; - return new EqualsBuilder(). - append(this.metaStoreURIs, - that.metaStoreURIs). - append(this.ugi, that.ugi). - append(this.threadId, that.threadId).isEquals(); - } - - @Override - public int hashCode() { - return new HashCodeBuilder(). - append(metaStoreURIs). - append(ugi). - append(threadId).toHashCode(); - } - } - - /** - * Add # of current users on HiveMetaStoreClient, so that the client can be cleaned when no one is using it. - */ - public static class CacheableHiveMetaStoreClient extends HiveMetaStoreClient { - private AtomicInteger users = new AtomicInteger(0); - private volatile boolean expiredFromCache = false; - private boolean isClosed = false; - private final long expiryTime; - private static final int EXPIRY_TIME_EXTENSION_IN_MILLIS = 60 * 1000; - - public CacheableHiveMetaStoreClient(final HiveConf conf, final int timeout) throws MetaException { - super(conf); - // Extend the expiry time with some extra time on top of guava expiry time to make sure - // that items closed() are for sure expired and would never be returned by guava. - this.expiryTime = System.currentTimeMillis() + timeout * 1000 + EXPIRY_TIME_EXTENSION_IN_MILLIS; - } - - private void acquire() { - users.incrementAndGet(); - } - - private void release() { - users.decrementAndGet(); - } - - public void setExpiredFromCache() { - expiredFromCache = true; - } - - public boolean isClosed() { - return isClosed; - } - - /** - * Make a call to hive meta store and see if the client is still usable. Some calls where the user provides - * invalid data renders the client unusable for future use (example: create a table with very long table name) - * @return - */ - protected boolean isOpen() { - try { - // Look for an unlikely database name and see if either MetaException or TException is thrown - this.getDatabase("NonExistentDatabaseUsedForHealthCheck"); - } catch (NoSuchObjectException e) { - return true; // It is okay if the database doesn't exist - } catch (MetaException e) { - return false; - } catch (TException e) { - return false; - } - return true; - } - - /** - * Decrement the user count and piggyback this to set expiry flag as well, then teardown(), if conditions are met. - * This *MUST* be called by anyone who uses this client. - */ - @Override - public void close() { - release(); - if (System.currentTimeMillis() >= expiryTime) - setExpiredFromCache(); - tearDownIfUnused(); - } - - /** - * Tear down only if - * 1. There are no active user - * 2. It has expired from the cache - */ - private void tearDownIfUnused() { - if (users.get() == 0 && expiredFromCache) { - this.tearDown(); - } - } - - /** - * Close if not closed already - */ - protected synchronized void tearDown() { - try { - if (!isClosed) { - super.close(); - } - isClosed = true; - } catch (Exception e) { - LOG.warn("Error closing hive metastore client. Ignored.", e); - } - } - - /** - * Last effort to clean up, may not even get called. - * @throws Throwable - */ - @Override - protected void finalize() throws Throwable { - try { - this.tearDown(); - } finally { - super.finalize(); - } - } - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/DataType.java hcatalog/core/src/main/java/org/apache/hcatalog/data/DataType.java deleted file mode 100644 index 8d51904..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/DataType.java +++ /dev/null @@ -1,207 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.data; - -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; -import java.util.Map.Entry; - - -public abstract class DataType { - - public static final byte NULL = 1; - public static final byte BOOLEAN = 5; - public static final byte BYTE = 6; - public static final byte INTEGER = 10; - public static final byte SHORT = 11; - public static final byte LONG = 15; - public static final byte FLOAT = 20; - public static final byte DOUBLE = 25; - public static final byte STRING = 55; - public static final byte BINARY = 60; - - public static final byte MAP = 100; - public static final byte STRUCT = 110; - public static final byte LIST = 120; - public static final byte ERROR = -1; - - /** - * Determine the datatype of an object. - * @param o Object to test. - * @return byte code of the type, or ERROR if we don't know. - */ - public static byte findType(Object o) { - if (o == null) { - return NULL; - } - - Class clazz = o.getClass(); - - // Try to put the most common first - if (clazz == String.class) { - return STRING; - } else if (clazz == Integer.class) { - return INTEGER; - } else if (clazz == Long.class) { - return LONG; - } else if (clazz == Float.class) { - return FLOAT; - } else if (clazz == Double.class) { - return DOUBLE; - } else if (clazz == Boolean.class) { - return BOOLEAN; - } else if (clazz == Byte.class) { - return BYTE; - } else if (clazz == Short.class) { - return SHORT; - } else if (o instanceof List) { - return LIST; - } else if (o instanceof Map) { - return MAP; - } else if (o instanceof byte[]) { - return BINARY; - } else { - return ERROR; - } - } - - public static int compare(Object o1, Object o2) { - - return compare(o1, o2, findType(o1), findType(o2)); - } - - public static int compare(Object o1, Object o2, byte dt1, byte dt2) { - if (dt1 == dt2) { - switch (dt1) { - case NULL: - return 0; - - case BOOLEAN: - return ((Boolean) o1).compareTo((Boolean) o2); - - case BYTE: - return ((Byte) o1).compareTo((Byte) o2); - - case INTEGER: - return ((Integer) o1).compareTo((Integer) o2); - - case LONG: - return ((Long) o1).compareTo((Long) o2); - - case FLOAT: - return ((Float) o1).compareTo((Float) o2); - - case DOUBLE: - return ((Double) o1).compareTo((Double) o2); - - case STRING: - return ((String) o1).compareTo((String) o2); - - case SHORT: - return ((Short) o1).compareTo((Short) o2); - - case BINARY: - return compareByteArray((byte[]) o1, (byte[]) o2); - - case LIST: - List l1 = (List) o1; - List l2 = (List) o2; - int len = l1.size(); - if (len != l2.size()) { - return len - l2.size(); - } else { - for (int i = 0; i < len; i++) { - int cmpVal = compare(l1.get(i), l2.get(i)); - if (cmpVal != 0) { - return cmpVal; - } - } - return 0; - } - - case MAP: { - Map m1 = (Map) o1; - Map m2 = (Map) o2; - int sz1 = m1.size(); - int sz2 = m2.size(); - if (sz1 < sz2) { - return -1; - } else if (sz1 > sz2) { - return 1; - } else { - // This is bad, but we have to sort the keys of the maps in order - // to be commutative. - TreeMap tm1 = new TreeMap(m1); - TreeMap tm2 = new TreeMap(m2); - Iterator> i1 = tm1.entrySet().iterator(); - Iterator> i2 = tm2.entrySet().iterator(); - while (i1.hasNext()) { - Map.Entry entry1 = i1.next(); - Map.Entry entry2 = i2.next(); - int c = compare(entry1.getValue(), entry2.getValue()); - if (c != 0) { - return c; - } else { - c = compare(entry1.getValue(), entry2.getValue()); - if (c != 0) { - return c; - } - } - } - return 0; - } - } - - default: - throw new RuntimeException("Unkown type " + dt1 + - " in compare"); - } - } else { - return dt1 < dt2 ? -1 : 1; - } - } - - private static int compareByteArray(byte[] o1, byte[] o2) { - - for (int i = 0; i < o1.length; i++) { - if (i == o2.length) { - return 1; - } - if (o1[i] == o2[i]) { - continue; - } - if (o1[i] > o1[i]) { - return 1; - } else { - return -1; - } - } - - //bytes in o1 are same as o2 - //in case o2 was longer - if (o2.length > o1.length) { - return -1; - } - return 0; //equals - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/DefaultHCatRecord.java hcatalog/core/src/main/java/org/apache/hcatalog/data/DefaultHCatRecord.java deleted file mode 100644 index 6f85dd9..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/DefaultHCatRecord.java +++ /dev/null @@ -1,131 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.data; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.schema.HCatSchema; - -public class DefaultHCatRecord extends HCatRecord { - - private List contents; - - public DefaultHCatRecord() { - contents = new ArrayList(); - } - - public DefaultHCatRecord(int size) { - contents = new ArrayList(size); - for (int i = 0; i < size; i++) { - contents.add(null); - } - } - - @Override - public void remove(int idx) throws HCatException { - contents.remove(idx); - } - - public DefaultHCatRecord(List list) { - contents = list; - } - - @Override - public Object get(int fieldNum) { - return contents.get(fieldNum); - } - - @Override - public List getAll() { - return contents; - } - - @Override - public void set(int fieldNum, Object val) { - contents.set(fieldNum, val); - } - - @Override - public int size() { - return contents.size(); - } - - @Override - public void readFields(DataInput in) throws IOException { - - contents.clear(); - int len = in.readInt(); - for (int i = 0; i < len; i++) { - contents.add(ReaderWriter.readDatum(in)); - } - } - - @Override - public void write(DataOutput out) throws IOException { - int sz = size(); - out.writeInt(sz); - for (int i = 0; i < sz; i++) { - ReaderWriter.writeDatum(out, contents.get(i)); - } - - } - - @Override - public int hashCode() { - int hash = 1; - for (Object o : contents) { - if (o != null) { - hash = 31 * hash + o.hashCode(); - } - } - return hash; - } - - @Override - public String toString() { - - StringBuilder sb = new StringBuilder(); - for (Object o : contents) { - sb.append(o + "\t"); - } - return sb.toString(); - } - - @Override - public Object get(String fieldName, HCatSchema recordSchema) throws HCatException { - return get(recordSchema.getPosition(fieldName)); - } - - @Override - public void set(String fieldName, HCatSchema recordSchema, Object value) throws HCatException { - set(recordSchema.getPosition(fieldName), value); - } - - @Override - public void copy(HCatRecord r) throws HCatException { - this.contents = r.getAll(); - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecord.java hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecord.java deleted file mode 100644 index b529e52..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecord.java +++ /dev/null @@ -1,149 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.data; - -import java.util.List; -import java.util.Map; - -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.schema.HCatSchema; - -/** - * Abstract class exposing get and set semantics for basic record usage. - * Note : - * HCatRecord is designed only to be used as in-memory representation only. - * Don't use it to store data on the physical device. - */ -public abstract class HCatRecord implements HCatRecordable { - - public abstract Object get(String fieldName, HCatSchema recordSchema) throws HCatException; - - public abstract void set(String fieldName, HCatSchema recordSchema, Object value) throws HCatException; - - public abstract void remove(int idx) throws HCatException; - - public abstract void copy(HCatRecord r) throws HCatException; - - protected Object get(String fieldName, HCatSchema recordSchema, Class clazz) throws HCatException { - // TODO : if needed, verify that recordschema entry for fieldname matches appropriate type. - return get(fieldName, recordSchema); - } - - public Boolean getBoolean(String fieldName, HCatSchema recordSchema) throws HCatException { - return (Boolean) get(fieldName, recordSchema, Boolean.class); - } - - public void setBoolean(String fieldName, HCatSchema recordSchema, Boolean value) throws HCatException { - set(fieldName, recordSchema, value); - } - - public byte[] getByteArray(String fieldName, HCatSchema recordSchema) throws HCatException { - return (byte[]) get(fieldName, recordSchema, byte[].class); - } - - public void setByteArray(String fieldName, HCatSchema recordSchema, byte[] value) throws HCatException { - set(fieldName, recordSchema, value); - } - - public Byte getByte(String fieldName, HCatSchema recordSchema) throws HCatException { - //TINYINT - return (Byte) get(fieldName, recordSchema, Byte.class); - } - - public void setByte(String fieldName, HCatSchema recordSchema, Byte value) throws HCatException { - set(fieldName, recordSchema, value); - } - - public Short getShort(String fieldName, HCatSchema recordSchema) throws HCatException { - // SMALLINT - return (Short) get(fieldName, recordSchema, Short.class); - } - - public void setShort(String fieldName, HCatSchema recordSchema, Short value) throws HCatException { - set(fieldName, recordSchema, value); - } - - public Integer getInteger(String fieldName, HCatSchema recordSchema) throws HCatException { - return (Integer) get(fieldName, recordSchema, Integer.class); - } - - public void setInteger(String fieldName, HCatSchema recordSchema, Integer value) throws HCatException { - set(fieldName, recordSchema, value); - } - - public Long getLong(String fieldName, HCatSchema recordSchema) throws HCatException { - // BIGINT - return (Long) get(fieldName, recordSchema, Long.class); - } - - public void setLong(String fieldName, HCatSchema recordSchema, Long value) throws HCatException { - set(fieldName, recordSchema, value); - } - - public Float getFloat(String fieldName, HCatSchema recordSchema) throws HCatException { - return (Float) get(fieldName, recordSchema, Float.class); - } - - public void setFloat(String fieldName, HCatSchema recordSchema, Float value) throws HCatException { - set(fieldName, recordSchema, value); - } - - public Double getDouble(String fieldName, HCatSchema recordSchema) throws HCatException { - return (Double) get(fieldName, recordSchema, Double.class); - } - - public void setDouble(String fieldName, HCatSchema recordSchema, Double value) throws HCatException { - set(fieldName, recordSchema, value); - } - - public String getString(String fieldName, HCatSchema recordSchema) throws HCatException { - return (String) get(fieldName, recordSchema, String.class); - } - - public void setString(String fieldName, HCatSchema recordSchema, String value) throws HCatException { - set(fieldName, recordSchema, value); - } - - @SuppressWarnings("unchecked") - public List getStruct(String fieldName, HCatSchema recordSchema) throws HCatException { - return (List) get(fieldName, recordSchema, List.class); - } - - public void setStruct(String fieldName, HCatSchema recordSchema, List value) throws HCatException { - set(fieldName, recordSchema, value); - } - - public List getList(String fieldName, HCatSchema recordSchema) throws HCatException { - return (List) get(fieldName, recordSchema, List.class); - } - - public void setList(String fieldName, HCatSchema recordSchema, List value) throws HCatException { - set(fieldName, recordSchema, value); - } - - public Map getMap(String fieldName, HCatSchema recordSchema) throws HCatException { - return (Map) get(fieldName, recordSchema, Map.class); - } - - public void setMap(String fieldName, HCatSchema recordSchema, Map value) throws HCatException { - set(fieldName, recordSchema, value); - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordObjectInspector.java hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordObjectInspector.java deleted file mode 100644 index 20a2219..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordObjectInspector.java +++ /dev/null @@ -1,53 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.data; - -import java.util.List; - -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; - -public class HCatRecordObjectInspector extends StandardStructObjectInspector { - - protected HCatRecordObjectInspector(List structFieldNames, - List structFieldObjectInspectors) { - super(structFieldNames, structFieldObjectInspectors); - } - - @Override - public Object getStructFieldData(Object data, StructField fieldRef) { - if (data == null) { - return new IllegalArgumentException("Data passed in to get field from was null!"); - } - - int fieldID = ((MyField) fieldRef).getFieldID(); - if (!(fieldID >= 0 && fieldID < fields.size())) { - throw new IllegalArgumentException("Invalid field index [" + fieldID + "]"); - } - - return ((HCatRecord) data).get(fieldID); - } - - @Override - public List getStructFieldsDataAsList(Object o) { - return ((HCatRecord) o).getAll(); - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordObjectInspectorFactory.java hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordObjectInspectorFactory.java deleted file mode 100644 index e15d257..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordObjectInspectorFactory.java +++ /dev/null @@ -1,132 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.data; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; - -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * ObjectInspectorFactory for HCatRecordObjectInspectors (and associated helper inspectors) - */ -public class HCatRecordObjectInspectorFactory { - - private final static Logger LOG = LoggerFactory.getLogger(HCatRecordObjectInspectorFactory.class); - - static HashMap cachedHCatRecordObjectInspectors = - new HashMap(); - static HashMap cachedObjectInspectors = - new HashMap(); - - /** - * Returns HCatRecordObjectInspector given a StructTypeInfo type definition for the record to look into - * @param typeInfo Type definition for the record to look into - * @return appropriate HCatRecordObjectInspector - * @throws SerDeException - */ - public static HCatRecordObjectInspector getHCatRecordObjectInspector( - StructTypeInfo typeInfo) throws SerDeException { - HCatRecordObjectInspector oi = cachedHCatRecordObjectInspectors.get(typeInfo); - if (oi == null) { - - LOG.debug("Got asked for OI for {} [{} ]", typeInfo.getCategory(), typeInfo.getTypeName()); - switch (typeInfo.getCategory()) { - case STRUCT: - StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; - List fieldNames = structTypeInfo.getAllStructFieldNames(); - List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); - List fieldObjectInspectors = new ArrayList(fieldTypeInfos.size()); - for (int i = 0; i < fieldTypeInfos.size(); i++) { - fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i))); - } - oi = new HCatRecordObjectInspector(fieldNames, fieldObjectInspectors); - - break; - default: - // Hmm.. not good, - // the only type expected here is STRUCT, which maps to HCatRecord - // - anything else is an error. Return null as the inspector. - throw new SerDeException("TypeInfo [" + typeInfo.getTypeName() - + "] was not of struct type - HCatRecord expected struct type, got [" - + typeInfo.getCategory().toString() + "]"); - } - cachedHCatRecordObjectInspectors.put(typeInfo, oi); - } - return oi; - } - - public static ObjectInspector getStandardObjectInspectorFromTypeInfo(TypeInfo typeInfo) { - - - ObjectInspector oi = cachedObjectInspectors.get(typeInfo); - if (oi == null) { - - LOG.debug("Got asked for OI for {}, [{}]", typeInfo.getCategory(), typeInfo.getTypeName()); - switch (typeInfo.getCategory()) { - case PRIMITIVE: - oi = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( - ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()); - break; - case STRUCT: - StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; - List fieldNames = structTypeInfo.getAllStructFieldNames(); - List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); - List fieldObjectInspectors = - new ArrayList(fieldTypeInfos.size()); - for (int i = 0; i < fieldTypeInfos.size(); i++) { - fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i))); - } - oi = ObjectInspectorFactory.getStandardStructObjectInspector( - fieldNames, fieldObjectInspectors - ); - break; - case LIST: - ObjectInspector elementObjectInspector = getStandardObjectInspectorFromTypeInfo( - ((ListTypeInfo) typeInfo).getListElementTypeInfo()); - oi = ObjectInspectorFactory.getStandardListObjectInspector(elementObjectInspector); - break; - case MAP: - ObjectInspector keyObjectInspector = getStandardObjectInspectorFromTypeInfo( - ((MapTypeInfo) typeInfo).getMapKeyTypeInfo()); - ObjectInspector valueObjectInspector = getStandardObjectInspectorFromTypeInfo( - ((MapTypeInfo) typeInfo).getMapValueTypeInfo()); - oi = ObjectInspectorFactory.getStandardMapObjectInspector(keyObjectInspector, valueObjectInspector); - break; - default: - oi = null; - } - cachedObjectInspectors.put(typeInfo, oi); - } - return oi; - } - - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordSerDe.java hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordSerDe.java deleted file mode 100644 index 9ad94f6..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordSerDe.java +++ /dev/null @@ -1,318 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.data; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.TreeMap; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.SerDe; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.SerDeStats; -import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.io.Writable; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatContext; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * SerDe class for serializing to and from HCatRecord - */ -public class HCatRecordSerDe implements SerDe { - - private static final Logger LOG = LoggerFactory.getLogger(HCatRecordSerDe.class); - - public HCatRecordSerDe() throws SerDeException { - } - - private List columnNames; - private List columnTypes; - private StructTypeInfo rowTypeInfo; - - private HCatRecordObjectInspector cachedObjectInspector; - - @Override - public void initialize(Configuration conf, Properties tbl) - throws SerDeException { - - LOG.debug("Initializing HCatRecordSerDe"); - LOG.debug("props to serde: {}", tbl.entrySet()); - - // Get column names and types - String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); - String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); - - // all table column names - if (columnNameProperty.length() == 0) { - columnNames = new ArrayList(); - } else { - columnNames = Arrays.asList(columnNameProperty.split(",")); - } - - // all column types - if (columnTypeProperty.length() == 0) { - columnTypes = new ArrayList(); - } else { - columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); - } - - - LOG.debug("columns: {} {}", columnNameProperty, columnNames); - LOG.debug("types: {} {}", columnTypeProperty, columnTypes); - assert (columnNames.size() == columnTypes.size()); - - rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); - cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); - } - - public void initialize(HCatSchema hsch) throws SerDeException { - - LOG.debug("Initializing HCatRecordSerDe through HCatSchema {}.", hsch); - - rowTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(hsch.getSchemaAsTypeString()); - cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); - - } - - - /** - * The purpose of a deserialize method is to turn a data blob - * which is a writable representation of the data into an - * object that can then be parsed using the appropriate - * ObjectInspector. In this case, since HCatRecord is directly - * already the Writable object, there's no extra work to be done - * here. Most of the logic resides in the ObjectInspector to be - * able to return values from within the HCatRecord to hive when - * it wants it. - */ - @Override - public Object deserialize(Writable data) throws SerDeException { - if (!(data instanceof HCatRecord)) { - throw new SerDeException(getClass().getName() + ": expects HCatRecord!"); - } - - return (HCatRecord) data; - } - - /** - * The purpose of the serialize method is to turn an object-representation - * with a provided ObjectInspector into a Writable format, which - * the underlying layer can then use to write out. - * - * In this case, it means that Hive will call this method to convert - * an object with appropriate objectinspectors that it knows about, - * to write out a HCatRecord. - */ - @Override - public Writable serialize(Object obj, ObjectInspector objInspector) - throws SerDeException { - if (objInspector.getCategory() != Category.STRUCT) { - throw new SerDeException(getClass().toString() - + " can only serialize struct types, but we got: " - + objInspector.getTypeName()); - } - return new DefaultHCatRecord((List) serializeStruct(obj, (StructObjectInspector) objInspector)); - } - - - /** - * Return serialized HCatRecord from an underlying - * object-representation, and readable by an ObjectInspector - * @param obj : Underlying object-representation - * @param soi : StructObjectInspector - * @return HCatRecord - */ - private static List serializeStruct(Object obj, StructObjectInspector soi) - throws SerDeException { - - List fields = soi.getAllStructFieldRefs(); - List list = soi.getStructFieldsDataAsList(obj); - - if (list == null) { - return null; - } - - List l = new ArrayList(fields.size()); - - if (fields != null) { - for (int i = 0; i < fields.size(); i++) { - - // Get the field objectInspector and the field object. - ObjectInspector foi = fields.get(i).getFieldObjectInspector(); - Object f = list.get(i); - Object res = serializeField(f, foi); - l.add(i, res); - } - } - return l; - } - - /** - * Return underlying Java Object from an object-representation - * that is readable by a provided ObjectInspector. - */ - public static Object serializeField(Object field, ObjectInspector fieldObjectInspector) - throws SerDeException { - - Object res; - if (fieldObjectInspector.getCategory() == Category.PRIMITIVE) { - res = serializePrimitiveField(field, fieldObjectInspector); - } else if (fieldObjectInspector.getCategory() == Category.STRUCT) { - res = serializeStruct(field, (StructObjectInspector) fieldObjectInspector); - } else if (fieldObjectInspector.getCategory() == Category.LIST) { - res = serializeList(field, (ListObjectInspector) fieldObjectInspector); - } else if (fieldObjectInspector.getCategory() == Category.MAP) { - res = serializeMap(field, (MapObjectInspector) fieldObjectInspector); - } else { - throw new SerDeException(HCatRecordSerDe.class.toString() - + " does not know what to do with fields of unknown category: " - + fieldObjectInspector.getCategory() + " , type: " + fieldObjectInspector.getTypeName()); - } - return res; - } - - /** - * Helper method to return underlying Java Map from - * an object-representation that is readable by a provided - * MapObjectInspector - */ - private static Map serializeMap(Object f, MapObjectInspector moi) throws SerDeException { - ObjectInspector koi = moi.getMapKeyObjectInspector(); - ObjectInspector voi = moi.getMapValueObjectInspector(); - Map m = new TreeMap(); - - Map readMap = moi.getMap(f); - if (readMap == null) { - return null; - } else { - for (Map.Entry entry : readMap.entrySet()) { - m.put(serializeField(entry.getKey(), koi), serializeField(entry.getValue(), voi)); - } - } - return m; - } - - private static List serializeList(Object f, ListObjectInspector loi) throws SerDeException { - List l = loi.getList(f); - if (l == null) { - return null; - } - - ObjectInspector eloi = loi.getListElementObjectInspector(); - if (eloi.getCategory() == Category.PRIMITIVE) { - List list = new ArrayList(l.size()); - for (int i = 0; i < l.size(); i++) { - list.add(((PrimitiveObjectInspector) eloi).getPrimitiveJavaObject(l.get(i))); - } - return list; - } else if (eloi.getCategory() == Category.STRUCT) { - List> list = new ArrayList>(l.size()); - for (int i = 0; i < l.size(); i++) { - list.add(serializeStruct(l.get(i), (StructObjectInspector) eloi)); - } - return list; - } else if (eloi.getCategory() == Category.LIST) { - List> list = new ArrayList>(l.size()); - for (int i = 0; i < l.size(); i++) { - list.add(serializeList(l.get(i), (ListObjectInspector) eloi)); - } - return list; - } else if (eloi.getCategory() == Category.MAP) { - List> list = new ArrayList>(l.size()); - for (int i = 0; i < l.size(); i++) { - list.add(serializeMap(l.get(i), (MapObjectInspector) eloi)); - } - return list; - } else { - throw new SerDeException(HCatRecordSerDe.class.toString() - + " does not know what to do with fields of unknown category: " - + eloi.getCategory() + " , type: " + eloi.getTypeName()); - } - } - - private static Object serializePrimitiveField(Object field, - ObjectInspector fieldObjectInspector) { - - Object f = ((PrimitiveObjectInspector) fieldObjectInspector).getPrimitiveJavaObject(field); - if (f != null && HCatContext.INSTANCE.getConf().isPresent()) { - Configuration conf = HCatContext.INSTANCE.getConf().get(); - - if (f instanceof Boolean && - conf.getBoolean( - HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, - HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT)) { - return ((Boolean) f) ? 1 : 0; - } else if (f instanceof Short && - conf.getBoolean( - HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, - HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)) { - return new Integer((Short) f); - } else if (f instanceof Byte && - conf.getBoolean( - HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, - HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)) { - return new Integer((Byte) f); - } - } - - return f; - } - - /** - * Return an object inspector that can read through the object - * that we return from deserialize(). To wit, that means we need - * to return an ObjectInspector that can read HCatRecord, given - * the type info for it during initialize(). This also means - * that this method cannot and should not be called before initialize() - */ - @Override - public ObjectInspector getObjectInspector() throws SerDeException { - return (ObjectInspector) cachedObjectInspector; - } - - @Override - public Class getSerializedClass() { - return HCatRecord.class; - } - - @Override - public SerDeStats getSerDeStats() { - // no support for statistics yet - return null; - } - - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordable.java hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordable.java deleted file mode 100644 index 4f1ca38..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/HCatRecordable.java +++ /dev/null @@ -1,56 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.data; - -import java.util.List; - -import org.apache.hadoop.io.Writable; - -/** - * Interface that determines whether we can implement a HCatRecord on top of it - */ -public interface HCatRecordable extends Writable { - - /** - * Gets the field at the specified index. - * @param fieldNum the field number - * @return the object at the specified index - */ - Object get(int fieldNum); - - /** - * Gets all the fields of the hcat record. - * @return the list of fields - */ - List getAll(); - - /** - * Sets the field at the specified index. - * @param fieldNum the field number - * @param value the value to set - */ - void set(int fieldNum, Object value); - - /** - * Gets the size of the hcat record. - * @return the size - */ - int size(); - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/JsonSerDe.java hcatalog/core/src/main/java/org/apache/hcatalog/data/JsonSerDe.java deleted file mode 100644 index 02f02cf..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/JsonSerDe.java +++ /dev/null @@ -1,575 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.data; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.SerDe; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.SerDeStats; -import org.apache.hadoop.hive.serde2.SerDeUtils; -import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatFieldSchema.Type; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; -import org.codehaus.jackson.JsonFactory; -import org.codehaus.jackson.JsonParseException; -import org.codehaus.jackson.JsonParser; -import org.codehaus.jackson.JsonToken; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class JsonSerDe implements SerDe { - - private static final Logger LOG = LoggerFactory.getLogger(JsonSerDe.class); - private List columnNames; - private List columnTypes; - - private StructTypeInfo rowTypeInfo; - private HCatSchema schema; - - private JsonFactory jsonFactory = null; - - private HCatRecordObjectInspector cachedObjectInspector; - - @Override - public void initialize(Configuration conf, Properties tbl) - throws SerDeException { - - - LOG.debug("Initializing JsonSerDe"); - LOG.debug("props to serde: {}", tbl.entrySet()); - - - // Get column names and types - String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); - String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); - - // all table column names - if (columnNameProperty.length() == 0) { - columnNames = new ArrayList(); - } else { - columnNames = Arrays.asList(columnNameProperty.split(",")); - } - - // all column types - if (columnTypeProperty.length() == 0) { - columnTypes = new ArrayList(); - } else { - columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); - } - - LOG.debug("columns: {}, {}", columnNameProperty, columnNames); - LOG.debug("types: {}, {} ", columnTypeProperty, columnTypes); - - assert (columnNames.size() == columnTypes.size()); - - rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); - - cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); - try { - schema = HCatSchemaUtils.getHCatSchema(rowTypeInfo).get(0).getStructSubSchema(); - LOG.debug("schema : {}", schema); - LOG.debug("fields : {}", schema.getFieldNames()); - } catch (HCatException e) { - throw new SerDeException(e); - } - - jsonFactory = new JsonFactory(); - } - - /** - * Takes JSON string in Text form, and has to return an object representation above - * it that's readable by the corresponding object inspector. - * - * For this implementation, since we're using the jackson parser, we can construct - * our own object implementation, and we use HCatRecord for it - */ - @Override - public Object deserialize(Writable blob) throws SerDeException { - - Text t = (Text) blob; - JsonParser p; - List r = new ArrayList(Collections.nCopies(columnNames.size(), null)); - try { - p = jsonFactory.createJsonParser(new ByteArrayInputStream((t.getBytes()))); - if (p.nextToken() != JsonToken.START_OBJECT) { - throw new IOException("Start token not found where expected"); - } - JsonToken token; - while (((token = p.nextToken()) != JsonToken.END_OBJECT) && (token != null)) { - // iterate through each token, and create appropriate object here. - populateRecord(r, token, p, schema); - } - } catch (JsonParseException e) { - LOG.warn("Error [{}] parsing json text [{}].", e, t); - LOG.debug(null, e); - throw new SerDeException(e); - } catch (IOException e) { - LOG.warn("Error [{}] parsing json text [{}].", e, t); - LOG.debug(null, e); - throw new SerDeException(e); - } - - return new DefaultHCatRecord(r); - } - - private void populateRecord(List r, JsonToken token, JsonParser p, HCatSchema s) throws IOException { - if (token != JsonToken.FIELD_NAME) { - throw new IOException("Field name expected"); - } - String fieldName = p.getText(); - int fpos; - try { - fpos = s.getPosition(fieldName); - } catch (NullPointerException npe) { - fpos = getPositionFromHiveInternalColumnName(fieldName); - LOG.debug("NPE finding position for field [{}] in schema [{}]", fieldName, s); - if (!fieldName.equalsIgnoreCase(getHiveInternalColumnName(fpos))) { - LOG.error("Hive internal column name {} and position " - + "encoding {} for the column name are at odds", fieldName, fpos); - throw npe; - } - if (fpos == -1) { - return; // unknown field, we return. - } - } - HCatFieldSchema hcatFieldSchema = s.getFields().get(fpos); - Object currField = extractCurrentField(p, null, hcatFieldSchema, false); - r.set(fpos, currField); - } - - public String getHiveInternalColumnName(int fpos) { - return HiveConf.getColumnInternalName(fpos); - } - - public int getPositionFromHiveInternalColumnName(String internalName) { -// return HiveConf.getPositionFromInternalName(fieldName); - // The above line should have been all the implementation that - // we need, but due to a bug in that impl which recognizes - // only single-digit columns, we need another impl here. - Pattern internalPattern = Pattern.compile("_col([0-9]+)"); - Matcher m = internalPattern.matcher(internalName); - if (!m.matches()) { - return -1; - } else { - return Integer.parseInt(m.group(1)); - } - } - - /** - * Utility method to extract current expected field from given JsonParser - * - * To get the field, we need either a type or a hcatFieldSchema(necessary for complex types) - * It is possible that one of them can be null, and so, if so, the other is instantiated - * from the other - * - * isTokenCurrent is a boolean variable also passed in, which determines - * if the JsonParser is already at the token we expect to read next, or - * needs advancing to the next before we read. - */ - private Object extractCurrentField(JsonParser p, Type t, - HCatFieldSchema hcatFieldSchema, boolean isTokenCurrent) throws IOException, JsonParseException, - HCatException { - Object val = null; - JsonToken valueToken; - if (isTokenCurrent) { - valueToken = p.getCurrentToken(); - } else { - valueToken = p.nextToken(); - } - - if (hcatFieldSchema != null) { - t = hcatFieldSchema.getType(); - } - switch (t) { - case INT: - val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getIntValue(); - break; - case TINYINT: - val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getByteValue(); - break; - case SMALLINT: - val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getShortValue(); - break; - case BIGINT: - val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getLongValue(); - break; - case BOOLEAN: - String bval = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText(); - if (bval != null) { - val = Boolean.valueOf(bval); - } else { - val = null; - } - break; - case FLOAT: - val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getFloatValue(); - break; - case DOUBLE: - val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getDoubleValue(); - break; - case STRING: - val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText(); - break; - case BINARY: - throw new IOException("JsonSerDe does not support BINARY type"); - case ARRAY: - if (valueToken == JsonToken.VALUE_NULL) { - val = null; - break; - } - if (valueToken != JsonToken.START_ARRAY) { - throw new IOException("Start of Array expected"); - } - List arr = new ArrayList(); - while ((valueToken = p.nextToken()) != JsonToken.END_ARRAY) { - arr.add(extractCurrentField(p, null, hcatFieldSchema.getArrayElementSchema().get(0), true)); - } - val = arr; - break; - case MAP: - if (valueToken == JsonToken.VALUE_NULL) { - val = null; - break; - } - if (valueToken != JsonToken.START_OBJECT) { - throw new IOException("Start of Object expected"); - } - Map map = new LinkedHashMap(); - Type keyType = hcatFieldSchema.getMapKeyType(); - HCatFieldSchema valueSchema = hcatFieldSchema.getMapValueSchema().get(0); - while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) { - Object k = getObjectOfCorrespondingPrimitiveType(p.getCurrentName(), keyType); - Object v; - if (valueSchema.getType() == HCatFieldSchema.Type.STRUCT) { - v = extractCurrentField(p, null, valueSchema, false); - } else { - v = extractCurrentField(p, null, valueSchema, true); - } - - map.put(k, v); - } - val = map; - break; - case STRUCT: - if (valueToken == JsonToken.VALUE_NULL) { - val = null; - break; - } - if (valueToken != JsonToken.START_OBJECT) { - throw new IOException("Start of Object expected"); - } - HCatSchema subSchema = hcatFieldSchema.getStructSubSchema(); - int sz = subSchema.getFieldNames().size(); - - List struct = new ArrayList(Collections.nCopies(sz, null)); - while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) { - populateRecord(struct, valueToken, p, subSchema); - } - val = struct; - break; - } - return val; - } - - private Object getObjectOfCorrespondingPrimitiveType(String s, Type t) throws IOException { - switch (t) { - case INT: - return Integer.valueOf(s); - case TINYINT: - return Byte.valueOf(s); - case SMALLINT: - return Short.valueOf(s); - case BIGINT: - return Long.valueOf(s); - case BOOLEAN: - return (s.equalsIgnoreCase("true")); - case FLOAT: - return Float.valueOf(s); - case DOUBLE: - return Double.valueOf(s); - case STRING: - return s; - case BINARY: - throw new IOException("JsonSerDe does not support BINARY type"); - } - throw new IOException("Could not convert from string to map type " + t); - } - - /** - * Given an object and object inspector pair, traverse the object - * and generate a Text representation of the object. - */ - @Override - public Writable serialize(Object obj, ObjectInspector objInspector) - throws SerDeException { - StringBuilder sb = new StringBuilder(); - try { - - StructObjectInspector soi = (StructObjectInspector) objInspector; - List structFields = soi.getAllStructFieldRefs(); - assert (columnNames.size() == structFields.size()); - if (obj == null) { - sb.append("null"); - } else { - sb.append(SerDeUtils.LBRACE); - for (int i = 0; i < structFields.size(); i++) { - if (i > 0) { - sb.append(SerDeUtils.COMMA); - } - sb.append(SerDeUtils.QUOTE); - sb.append(columnNames.get(i)); - sb.append(SerDeUtils.QUOTE); - sb.append(SerDeUtils.COLON); - buildJSONString(sb, soi.getStructFieldData(obj, structFields.get(i)), - structFields.get(i).getFieldObjectInspector()); - } - sb.append(SerDeUtils.RBRACE); - } - - } catch (IOException e) { - LOG.warn("Error generating json text from object.", e); - throw new SerDeException(e); - } - return new Text(sb.toString()); - } - - // TODO : code section copied over from SerDeUtils because of non-standard json production there - // should use quotes for all field names. We should fix this there, and then remove this copy. - // See http://jackson.codehaus.org/1.7.3/javadoc/org/codehaus/jackson/JsonParser.Feature.html#ALLOW_UNQUOTED_FIELD_NAMES - // for details - trying to enable Jackson to ignore that doesn't seem to work(compilation failure - // when attempting to use that feature, so having to change the production itself. - // Also, throws IOException when Binary is detected. - private static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi) throws IOException { - - switch (oi.getCategory()) { - case PRIMITIVE: { - PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; - if (o == null) { - sb.append("null"); - } else { - switch (poi.getPrimitiveCategory()) { - case BOOLEAN: { - boolean b = ((BooleanObjectInspector) poi).get(o); - sb.append(b ? "true" : "false"); - break; - } - case BYTE: { - sb.append(((ByteObjectInspector) poi).get(o)); - break; - } - case SHORT: { - sb.append(((ShortObjectInspector) poi).get(o)); - break; - } - case INT: { - sb.append(((IntObjectInspector) poi).get(o)); - break; - } - case LONG: { - sb.append(((LongObjectInspector) poi).get(o)); - break; - } - case FLOAT: { - sb.append(((FloatObjectInspector) poi).get(o)); - break; - } - case DOUBLE: { - sb.append(((DoubleObjectInspector) poi).get(o)); - break; - } - case STRING: { - sb.append('"'); - sb.append(SerDeUtils.escapeString(((StringObjectInspector) poi) - .getPrimitiveJavaObject(o))); - sb.append('"'); - break; - } - case TIMESTAMP: { - sb.append('"'); - sb.append(((TimestampObjectInspector) poi) - .getPrimitiveWritableObject(o)); - sb.append('"'); - break; - } - case BINARY: { - throw new IOException("JsonSerDe does not support BINARY type"); - } - default: - throw new RuntimeException("Unknown primitive type: " - + poi.getPrimitiveCategory()); - } - } - break; - } - case LIST: { - ListObjectInspector loi = (ListObjectInspector) oi; - ObjectInspector listElementObjectInspector = loi - .getListElementObjectInspector(); - List olist = loi.getList(o); - if (olist == null) { - sb.append("null"); - } else { - sb.append(SerDeUtils.LBRACKET); - for (int i = 0; i < olist.size(); i++) { - if (i > 0) { - sb.append(SerDeUtils.COMMA); - } - buildJSONString(sb, olist.get(i), listElementObjectInspector); - } - sb.append(SerDeUtils.RBRACKET); - } - break; - } - case MAP: { - MapObjectInspector moi = (MapObjectInspector) oi; - ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector(); - ObjectInspector mapValueObjectInspector = moi - .getMapValueObjectInspector(); - Map omap = moi.getMap(o); - if (omap == null) { - sb.append("null"); - } else { - sb.append(SerDeUtils.LBRACE); - boolean first = true; - for (Object entry : omap.entrySet()) { - if (first) { - first = false; - } else { - sb.append(SerDeUtils.COMMA); - } - Map.Entry e = (Map.Entry) entry; - StringBuilder keyBuilder = new StringBuilder(); - buildJSONString(keyBuilder, e.getKey(), mapKeyObjectInspector); - String keyString = keyBuilder.toString().trim(); - boolean doQuoting = (!keyString.isEmpty()) && (keyString.charAt(0) != SerDeUtils.QUOTE); - if (doQuoting) { - sb.append(SerDeUtils.QUOTE); - } - sb.append(keyString); - if (doQuoting) { - sb.append(SerDeUtils.QUOTE); - } - sb.append(SerDeUtils.COLON); - buildJSONString(sb, e.getValue(), mapValueObjectInspector); - } - sb.append(SerDeUtils.RBRACE); - } - break; - } - case STRUCT: { - StructObjectInspector soi = (StructObjectInspector) oi; - List structFields = soi.getAllStructFieldRefs(); - if (o == null) { - sb.append("null"); - } else { - sb.append(SerDeUtils.LBRACE); - for (int i = 0; i < structFields.size(); i++) { - if (i > 0) { - sb.append(SerDeUtils.COMMA); - } - sb.append(SerDeUtils.QUOTE); - sb.append(structFields.get(i).getFieldName()); - sb.append(SerDeUtils.QUOTE); - sb.append(SerDeUtils.COLON); - buildJSONString(sb, soi.getStructFieldData(o, structFields.get(i)), - structFields.get(i).getFieldObjectInspector()); - } - sb.append(SerDeUtils.RBRACE); - } - break; - } - case UNION: { - UnionObjectInspector uoi = (UnionObjectInspector) oi; - if (o == null) { - sb.append("null"); - } else { - sb.append(SerDeUtils.LBRACE); - sb.append(uoi.getTag(o)); - sb.append(SerDeUtils.COLON); - buildJSONString(sb, uoi.getField(o), - uoi.getObjectInspectors().get(uoi.getTag(o))); - sb.append(SerDeUtils.RBRACE); - } - break; - } - default: - throw new RuntimeException("Unknown type in ObjectInspector!"); - } - } - - - /** - * Returns an object inspector for the specified schema that - * is capable of reading in the object representation of the JSON string - */ - @Override - public ObjectInspector getObjectInspector() throws SerDeException { - return cachedObjectInspector; - } - - @Override - public Class getSerializedClass() { - return Text.class; - } - - @Override - public SerDeStats getSerDeStats() { - // no support for statistics yet - return null; - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/LazyHCatRecord.java hcatalog/core/src/main/java/org/apache/hcatalog/data/LazyHCatRecord.java deleted file mode 100644 index ad2d83a..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/LazyHCatRecord.java +++ /dev/null @@ -1,146 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.data; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * An implementation of HCatRecord that wraps an Object returned by a SerDe - * and an ObjectInspector. This delays deserialization of unused columns. - */ -public class LazyHCatRecord extends HCatRecord { - - public static final Logger LOG = LoggerFactory.getLogger(LazyHCatRecord.class.getName()); - - private Object wrappedObject; - private StructObjectInspector soi; - - @Override - public Object get(int fieldNum) { - try { - StructField fref = soi.getAllStructFieldRefs().get(fieldNum); - return HCatRecordSerDe.serializeField( - soi.getStructFieldData(wrappedObject, fref), - fref.getFieldObjectInspector()); - } catch (SerDeException e) { - throw new IllegalStateException("SerDe Exception deserializing",e); - } - } - - @Override - public List getAll() { - List r = new ArrayList(this.size()); - for (int i = 0; i < this.size(); i++){ - r.add(i, get(i)); - } - return r; - } - - @Override - public void set(int fieldNum, Object value) { - throw new UnsupportedOperationException("not allowed to run set() on LazyHCatRecord"); - } - - @Override - public int size() { - return soi.getAllStructFieldRefs().size(); - } - - @Override - public void readFields(DataInput in) throws IOException { - throw new UnsupportedOperationException("LazyHCatRecord is intended to wrap" - + " an object/object inspector as a HCatRecord " - + "- it does not need to be read from DataInput."); - } - - @Override - public void write(DataOutput out) throws IOException { - throw new UnsupportedOperationException("LazyHCatRecord is intended to wrap" - + " an object/object inspector as a HCatRecord " - + "- it does not need to be written to a DataOutput."); - } - - @Override - public Object get(String fieldName, HCatSchema recordSchema) throws HCatException { - int idx = recordSchema.getPosition(fieldName); - return get(idx); - } - - @Override - public void set(String fieldName, HCatSchema recordSchema, Object value) throws HCatException { - throw new UnsupportedOperationException("not allowed to run set() on LazyHCatRecord"); - } - - @Override - public void remove(int idx) throws HCatException { - throw new UnsupportedOperationException("not allowed to run remove() on LazyHCatRecord"); - } - - @Override - public void copy(HCatRecord r) throws HCatException { - throw new UnsupportedOperationException("not allowed to run copy() on LazyHCatRecord"); - } - - public LazyHCatRecord(Object wrappedObject, ObjectInspector oi) throws Exception { - if (oi.getCategory() != Category.STRUCT) { - throw new SerDeException(getClass().toString() + - " can only make a lazy hcat record from " + - "objects of struct types, but we got: " + oi.getTypeName()); - } - - this.soi = (StructObjectInspector)oi; - this.wrappedObject = wrappedObject; - } - - @Override - public String toString(){ - StringBuilder sb = new StringBuilder(); - for(int i = 0; i< size() ; i++) { - sb.append(get(i)+"\t"); - } - return sb.toString(); - } - - /** - * Convert this LazyHCatRecord to a DefaultHCatRecord. This is required - * before you can write out a record via write. - * @return an HCatRecord that can be serialized - * @throws HCatException - */ - public HCatRecord getWritable() throws HCatException { - DefaultHCatRecord d = new DefaultHCatRecord(); - d.copy(this); - return d; - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/Pair.java hcatalog/core/src/main/java/org/apache/hcatalog/data/Pair.java deleted file mode 100644 index f45a579..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/Pair.java +++ /dev/null @@ -1,89 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.data; - -import java.io.Serializable; - -/** - * Copy of C++ STL pair container. - */ -public class Pair implements Serializable { - - private static final long serialVersionUID = 1L; - public T first; - public U second; - - /** - * @param f First element in pair. - * @param s Second element in pair. - */ - public Pair(T f, U s) { - first = f; - second = s; - } - - /* (non-Javadoc) - * @see java.lang.Object#toString() - */ - @Override - public String toString() { - return "[" + first.toString() + "," + second.toString() + "]"; - } - - @Override - public int hashCode() { - return (((this.first == null ? 1 : this.first.hashCode()) * 17) - + (this.second == null ? 1 : this.second.hashCode()) * 19); - } - - @Override - public boolean equals(Object other) { - if (other == null) { - return false; - } - - if (!(other instanceof Pair)) { - return false; - } - - Pair otherPair = (Pair) other; - - if (this.first == null) { - if (otherPair.first != null) { - return false; - } else { - return true; - } - } - - if (this.second == null) { - if (otherPair.second != null) { - return false; - } else { - return true; - } - } - - if (this.first.equals(otherPair.first) && this.second.equals(otherPair.second)) { - return true; - } else { - return false; - } - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/ReaderWriter.java hcatalog/core/src/main/java/org/apache/hcatalog/data/ReaderWriter.java deleted file mode 100644 index eca4ebb..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/ReaderWriter.java +++ /dev/null @@ -1,192 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.data; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; - -import org.apache.hadoop.io.VIntWritable; -import org.apache.hadoop.io.VLongWritable; - - -public abstract class ReaderWriter { - - private static final String UTF8 = "UTF-8"; - - public static Object readDatum(DataInput in) throws IOException { - - byte type = in.readByte(); - switch (type) { - - case DataType.STRING: - byte[] buffer = new byte[in.readInt()]; - in.readFully(buffer); - return new String(buffer, UTF8); - - case DataType.INTEGER: - VIntWritable vint = new VIntWritable(); - vint.readFields(in); - return vint.get(); - - case DataType.LONG: - VLongWritable vlong = new VLongWritable(); - vlong.readFields(in); - return vlong.get(); - - case DataType.FLOAT: - return in.readFloat(); - - case DataType.DOUBLE: - return in.readDouble(); - - case DataType.BOOLEAN: - return in.readBoolean(); - - case DataType.BYTE: - return in.readByte(); - - case DataType.SHORT: - return in.readShort(); - - case DataType.NULL: - return null; - - case DataType.BINARY: - int len = in.readInt(); - byte[] ba = new byte[len]; - in.readFully(ba); - return ba; - - case DataType.MAP: - int size = in.readInt(); - Map m = new HashMap(size); - for (int i = 0; i < size; i++) { - m.put(readDatum(in), readDatum(in)); - } - return m; - - case DataType.LIST: - int sz = in.readInt(); - List list = new ArrayList(sz); - for (int i = 0; i < sz; i++) { - list.add(readDatum(in)); - } - return list; - - default: - throw new IOException("Unexpected data type " + type + - " found in stream."); - } - } - - public static void writeDatum(DataOutput out, Object val) throws IOException { - // write the data type - byte type = DataType.findType(val); - switch (type) { - case DataType.LIST: - out.writeByte(DataType.LIST); - List list = (List) val; - int sz = list.size(); - out.writeInt(sz); - for (int i = 0; i < sz; i++) { - writeDatum(out, list.get(i)); - } - return; - - case DataType.MAP: - out.writeByte(DataType.MAP); - Map m = (Map) val; - out.writeInt(m.size()); - Iterator i = - m.entrySet().iterator(); - while (i.hasNext()) { - Entry entry = (Entry) i.next(); - writeDatum(out, entry.getKey()); - writeDatum(out, entry.getValue()); - } - return; - - case DataType.INTEGER: - out.writeByte(DataType.INTEGER); - new VIntWritable((Integer) val).write(out); - return; - - case DataType.LONG: - out.writeByte(DataType.LONG); - new VLongWritable((Long) val).write(out); - return; - - case DataType.FLOAT: - out.writeByte(DataType.FLOAT); - out.writeFloat((Float) val); - return; - - case DataType.DOUBLE: - out.writeByte(DataType.DOUBLE); - out.writeDouble((Double) val); - return; - - case DataType.BOOLEAN: - out.writeByte(DataType.BOOLEAN); - out.writeBoolean((Boolean) val); - return; - - case DataType.BYTE: - out.writeByte(DataType.BYTE); - out.writeByte((Byte) val); - return; - - case DataType.SHORT: - out.writeByte(DataType.SHORT); - out.writeShort((Short) val); - return; - - case DataType.STRING: - String s = (String) val; - byte[] utfBytes = s.getBytes(ReaderWriter.UTF8); - out.writeByte(DataType.STRING); - out.writeInt(utfBytes.length); - out.write(utfBytes); - return; - - case DataType.BINARY: - byte[] ba = (byte[]) val; - out.writeByte(DataType.BINARY); - out.writeInt(ba.length); - out.write(ba); - return; - - case DataType.NULL: - out.writeByte(DataType.NULL); - return; - - default: - throw new IOException("Unexpected data type " + type + - " found in stream."); - } - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/schema/HCatFieldSchema.java hcatalog/core/src/main/java/org/apache/hcatalog/data/schema/HCatFieldSchema.java deleted file mode 100644 index a869d9f..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/schema/HCatFieldSchema.java +++ /dev/null @@ -1,292 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.data.schema; - -import java.io.Serializable; - -import org.apache.commons.lang.builder.ToStringBuilder; -import org.apache.hcatalog.common.HCatException; - -public class HCatFieldSchema implements Serializable { - - public enum Type { - INT, - TINYINT, - SMALLINT, - BIGINT, - BOOLEAN, - FLOAT, - DOUBLE, - STRING, - ARRAY, - MAP, - STRUCT, - BINARY, - } - - public enum Category { - PRIMITIVE, - ARRAY, - MAP, - STRUCT; - - public static Category fromType(Type type) { - if (Type.ARRAY == type) { - return ARRAY; - } else if (Type.STRUCT == type) { - return STRUCT; - } else if (Type.MAP == type) { - return MAP; - } else { - return PRIMITIVE; - } - } - } - - ; - - public boolean isComplex() { - return (category == Category.PRIMITIVE) ? false : true; - } - - /** - * - */ - private static final long serialVersionUID = 1L; - - String fieldName = null; - String comment = null; - Type type = null; - Category category = null; - - // Populated if column is struct, array or map types. - // If struct type, contains schema of the struct. - // If array type, contains schema of one of the elements. - // If map type, contains schema of the value element. - HCatSchema subSchema = null; - - // populated if column is Map type - Type mapKeyType = null; - - private String typeString = null; - - @SuppressWarnings("unused") - private HCatFieldSchema() { - // preventing empty ctor from being callable - } - - /** - * Returns type of the field - * @return type of the field - */ - public Type getType() { - return type; - } - - /** - * Returns category of the field - * @return category of the field - */ - public Category getCategory() { - return category; - } - - /** - * Returns name of the field - * @return name of the field - */ - public String getName() { - return fieldName; - } - - public String getComment() { - return comment; - } - - /** - * Constructor constructing a primitive datatype HCatFieldSchema - * @param fieldName Name of the primitive field - * @param type Type of the primitive field - * @throws HCatException if call made on non-primitive types - */ - public HCatFieldSchema(String fieldName, Type type, String comment) throws HCatException { - assertTypeInCategory(type, Category.PRIMITIVE, fieldName); - this.fieldName = fieldName; - this.type = type; - this.category = Category.PRIMITIVE; - this.comment = comment; - } - - /** - * Constructor for constructing a ARRAY type or STRUCT type HCatFieldSchema, passing type and subschema - * @param fieldName Name of the array or struct field - * @param type Type of the field - either Type.ARRAY or Type.STRUCT - * @param subSchema - subschema of the struct, or element schema of the elements in the array - * @throws HCatException if call made on Primitive or Map types - */ - public HCatFieldSchema(String fieldName, Type type, HCatSchema subSchema, String comment) throws HCatException { - assertTypeNotInCategory(type, Category.PRIMITIVE); - assertTypeNotInCategory(type, Category.MAP); - this.fieldName = fieldName; - this.type = type; - this.category = Category.fromType(type); - this.subSchema = subSchema; - if (type == Type.ARRAY) { - this.subSchema.get(0).setName(null); - } - this.comment = comment; - } - - private void setName(String name) { - this.fieldName = name; - } - - /** - * Constructor for constructing a MAP type HCatFieldSchema, passing type of key and value - * @param fieldName Name of the array or struct field - * @param type Type of the field - must be Type.MAP - * @param mapKeyType - key type of the Map - * @param mapValueSchema - subschema of the value of the Map - * @throws HCatException if call made on non-Map types - */ - public HCatFieldSchema(String fieldName, Type type, Type mapKeyType, HCatSchema mapValueSchema, String comment) throws HCatException { - assertTypeInCategory(type, Category.MAP, fieldName); - assertTypeInCategory(mapKeyType, Category.PRIMITIVE, fieldName); - this.fieldName = fieldName; - this.type = Type.MAP; - this.category = Category.MAP; - this.mapKeyType = mapKeyType; - this.subSchema = mapValueSchema; - this.subSchema.get(0).setName(null); - this.comment = comment; - } - - public HCatSchema getStructSubSchema() throws HCatException { - assertTypeInCategory(this.type, Category.STRUCT, this.fieldName); - return subSchema; - } - - public HCatSchema getArrayElementSchema() throws HCatException { - assertTypeInCategory(this.type, Category.ARRAY, this.fieldName); - return subSchema; - } - - public Type getMapKeyType() throws HCatException { - assertTypeInCategory(this.type, Category.MAP, this.fieldName); - return mapKeyType; - } - - public HCatSchema getMapValueSchema() throws HCatException { - assertTypeInCategory(this.type, Category.MAP, this.fieldName); - return subSchema; - } - - private static void assertTypeInCategory(Type type, Category category, String fieldName) throws HCatException { - Category typeCategory = Category.fromType(type); - if (typeCategory != category) { - throw new HCatException("Type category mismatch. Expected " + category + " but type " + type + " in category " + typeCategory + " (field " + fieldName + ")"); - } - } - - private static void assertTypeNotInCategory(Type type, Category category) throws HCatException { - Category typeCategory = Category.fromType(type); - if (typeCategory == category) { - throw new HCatException("Type category mismatch. Expected type " + type + " not in category " + category + " but was so."); - } - } - - @Override - public String toString() { - return new ToStringBuilder(this) - .append("fieldName", fieldName) - .append("comment", comment) - .append("type", getTypeString()) - .append("category", category) - .toString(); - } - - public String getTypeString() { - if (typeString != null) { - return typeString; - } - - StringBuilder sb = new StringBuilder(); - if (Category.PRIMITIVE == category) { - sb.append(type); - } else if (Category.STRUCT == category) { - sb.append("struct<"); - sb.append(subSchema.getSchemaAsTypeString()); - sb.append(">"); - } else if (Category.ARRAY == category) { - sb.append("array<"); - sb.append(subSchema.getSchemaAsTypeString()); - sb.append(">"); - } else if (Category.MAP == category) { - sb.append("map<"); - sb.append(mapKeyType); - sb.append(","); - sb.append(subSchema.getSchemaAsTypeString()); - sb.append(">"); - } - return (typeString = sb.toString().toLowerCase()); - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (!(obj instanceof HCatFieldSchema)) { - return false; - } - HCatFieldSchema other = (HCatFieldSchema) obj; - if (category != other.category) { - return false; - } - if (fieldName == null) { - if (other.fieldName != null) { - return false; - } - } else if (!fieldName.equals(other.fieldName)) { - return false; - } - if (this.getTypeString() == null) { - if (other.getTypeString() != null) { - return false; - } - } else if (!this.getTypeString().equals(other.getTypeString())) { - return false; - } - return true; - } - - @Override - public int hashCode() { - //result could be cached if this object were to be made immutable... - int result = 17; - result = 31 * result + (category == null ? 0 : category.hashCode()); - result = 31 * result + (fieldName == null ? 0 : fieldName.hashCode()); - result = 31 * result + (getTypeString() == null ? 0 : - getTypeString().hashCode()); - return result; - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/schema/HCatSchema.java hcatalog/core/src/main/java/org/apache/hcatalog/data/schema/HCatSchema.java deleted file mode 100644 index 62cad18..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/schema/HCatSchema.java +++ /dev/null @@ -1,185 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.data.schema; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.hcatalog.common.HCatException; - -/** - * HCatSchema. This class is NOT thread-safe. - */ - -public class HCatSchema implements Serializable { - - private static final long serialVersionUID = 1L; - - private final List fieldSchemas; - //HCatFieldSchema.getName()->position - private final Map fieldPositionMap; - private final List fieldNames; - - /** - * - * @param fieldSchemas is now owned by HCatSchema. Any subsequent modifications - * on fieldSchemas won't get reflected in HCatSchema. Each fieldSchema's name - * in the list must be unique, otherwise throws IllegalArgumentException. - */ - public HCatSchema(final List fieldSchemas) { - this.fieldSchemas = new ArrayList(fieldSchemas); - int idx = 0; - fieldPositionMap = new HashMap(); - fieldNames = new ArrayList(); - for (HCatFieldSchema field : fieldSchemas) { - if (field == null) - throw new IllegalArgumentException("Field cannot be null"); - - String fieldName = field.getName(); - if (fieldPositionMap.containsKey(fieldName)) - throw new IllegalArgumentException("Field named " + fieldName + - " already exists"); - fieldPositionMap.put(fieldName, idx); - fieldNames.add(fieldName); - idx++; - } - } - - public void append(final HCatFieldSchema hfs) throws HCatException { - if (hfs == null) - throw new HCatException("Attempt to append null HCatFieldSchema in HCatSchema."); - - String fieldName = hfs.getName(); - if (fieldPositionMap.containsKey(fieldName)) - throw new HCatException("Attempt to append HCatFieldSchema with already " + - "existing name: " + fieldName + "."); - - this.fieldSchemas.add(hfs); - this.fieldNames.add(fieldName); - this.fieldPositionMap.put(fieldName, this.size() - 1); - } - - /** - * Users are not allowed to modify the list directly, since HCatSchema - * maintains internal state. Use append/remove to modify the schema. - */ - public List getFields() { - return Collections.unmodifiableList(this.fieldSchemas); - } - - /** - * @param fieldName - * @return the index of field named fieldName in Schema. If field is not - * present, returns null. - */ - public Integer getPosition(String fieldName) { - return fieldPositionMap.get(fieldName); - } - - public HCatFieldSchema get(String fieldName) throws HCatException { - return get(getPosition(fieldName)); - } - - public List getFieldNames() { - return this.fieldNames; - } - - public HCatFieldSchema get(int position) { - return fieldSchemas.get(position); - } - - public int size() { - return fieldSchemas.size(); - } - - public void remove(final HCatFieldSchema hcatFieldSchema) throws HCatException { - - if (!fieldSchemas.contains(hcatFieldSchema)) { - throw new HCatException("Attempt to delete a non-existent column from HCat Schema: " + hcatFieldSchema); - } - - fieldSchemas.remove(hcatFieldSchema); - fieldPositionMap.remove(hcatFieldSchema.getName()); - fieldNames.remove(hcatFieldSchema.getName()); - } - - @Override - public String toString() { - boolean first = true; - StringBuilder sb = new StringBuilder(); - for (HCatFieldSchema hfs : fieldSchemas) { - if (!first) { - sb.append(","); - } else { - first = false; - } - if (hfs.getName() != null) { - sb.append(hfs.getName()); - sb.append(":"); - } - sb.append(hfs.toString()); - } - return sb.toString(); - } - - public String getSchemaAsTypeString() { - boolean first = true; - StringBuilder sb = new StringBuilder(); - for (HCatFieldSchema hfs : fieldSchemas) { - if (!first) { - sb.append(","); - } else { - first = false; - } - if (hfs.getName() != null) { - sb.append(hfs.getName()); - sb.append(":"); - } - sb.append(hfs.getTypeString()); - } - return sb.toString(); - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null) { - return false; - } - if (!(obj instanceof HCatSchema)) { - return false; - } - HCatSchema other = (HCatSchema) obj; - if (!this.getFields().equals(other.getFields())) { - return false; - } - return true; - } - - @Override - public int hashCode() { - return toString().hashCode(); - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/schema/HCatSchemaUtils.java hcatalog/core/src/main/java/org/apache/hcatalog/data/schema/HCatSchemaUtils.java deleted file mode 100644 index b3ea7b0..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/schema/HCatSchemaUtils.java +++ /dev/null @@ -1,229 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.data.schema; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Schema; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.schema.HCatFieldSchema.Type; - - -public class HCatSchemaUtils { - - public static CollectionBuilder getStructSchemaBuilder() { - return new CollectionBuilder(); - } - - public static CollectionBuilder getListSchemaBuilder() { - return new CollectionBuilder(); - } - - public static MapBuilder getMapSchemaBuilder() { - return new MapBuilder(); - } - - - public static abstract class HCatSchemaBuilder { - public abstract HCatSchema build() throws HCatException; - } - - public static class CollectionBuilder extends HCatSchemaBuilder { // for STRUCTS(multiple-add-calls) and LISTS(single-add-call) - List fieldSchemas = null; - - CollectionBuilder() { - fieldSchemas = new ArrayList(); - } - - public CollectionBuilder addField(FieldSchema fieldSchema) throws HCatException { - return this.addField(getHCatFieldSchema(fieldSchema)); - } - - public CollectionBuilder addField(HCatFieldSchema fieldColumnSchema) { - fieldSchemas.add(fieldColumnSchema); - return this; - } - - @Override - public HCatSchema build() throws HCatException { - return new HCatSchema(fieldSchemas); - } - - } - - public static class MapBuilder extends HCatSchemaBuilder { - - Type keyType = null; - HCatSchema valueSchema = null; - - @Override - public HCatSchema build() throws HCatException { - List fslist = new ArrayList(); - fslist.add(new HCatFieldSchema(null, Type.MAP, keyType, valueSchema, null)); - return new HCatSchema(fslist); - } - - public MapBuilder withValueSchema(HCatSchema valueSchema) { - this.valueSchema = valueSchema; - return this; - } - - public MapBuilder withKeyType(Type keyType) { - this.keyType = keyType; - return this; - } - - } - - - /** - * Convert a HCatFieldSchema to a FieldSchema - * @param fs FieldSchema to convert - * @return HCatFieldSchema representation of FieldSchema - * @throws HCatException - */ - public static HCatFieldSchema getHCatFieldSchema(FieldSchema fs) throws HCatException { - String fieldName = fs.getName(); - TypeInfo baseTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fs.getType()); - return getHCatFieldSchema(fieldName, baseTypeInfo); - } - - private static HCatFieldSchema getHCatFieldSchema(String fieldName, TypeInfo fieldTypeInfo) throws HCatException { - Category typeCategory = fieldTypeInfo.getCategory(); - HCatFieldSchema hCatFieldSchema; - if (Category.PRIMITIVE == typeCategory) { - hCatFieldSchema = new HCatFieldSchema(fieldName, getPrimitiveHType(fieldTypeInfo), null); - } else if (Category.STRUCT == typeCategory) { - HCatSchema subSchema = constructHCatSchema((StructTypeInfo) fieldTypeInfo); - hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.STRUCT, subSchema, null); - } else if (Category.LIST == typeCategory) { - HCatSchema subSchema = getHCatSchema(((ListTypeInfo) fieldTypeInfo).getListElementTypeInfo()); - hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.ARRAY, subSchema, null); - } else if (Category.MAP == typeCategory) { - HCatFieldSchema.Type mapKeyType = getPrimitiveHType(((MapTypeInfo) fieldTypeInfo).getMapKeyTypeInfo()); - HCatSchema subSchema = getHCatSchema(((MapTypeInfo) fieldTypeInfo).getMapValueTypeInfo()); - hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.MAP, mapKeyType, subSchema, null); - } else { - throw new TypeNotPresentException(fieldTypeInfo.getTypeName(), null); - } - return hCatFieldSchema; - } - - private static Type getPrimitiveHType(TypeInfo basePrimitiveTypeInfo) { - switch (((PrimitiveTypeInfo) basePrimitiveTypeInfo).getPrimitiveCategory()) { - case BOOLEAN: - return Type.BOOLEAN; - case BYTE: - return Type.TINYINT; - case DOUBLE: - return Type.DOUBLE; - case FLOAT: - return Type.FLOAT; - case INT: - return Type.INT; - case LONG: - return Type.BIGINT; - case SHORT: - return Type.SMALLINT; - case STRING: - return Type.STRING; - case BINARY: - return Type.BINARY; - default: - throw new TypeNotPresentException(((PrimitiveTypeInfo) basePrimitiveTypeInfo).getTypeName(), null); - } - } - - public static HCatSchema getHCatSchema(Schema schema) throws HCatException { - return getHCatSchema(schema.getFieldSchemas()); - } - - public static HCatSchema getHCatSchema(List fslist) throws HCatException { - CollectionBuilder builder = getStructSchemaBuilder(); - for (FieldSchema fieldSchema : fslist) { - builder.addField(fieldSchema); - } - return builder.build(); - } - - private static HCatSchema constructHCatSchema(StructTypeInfo stypeInfo) throws HCatException { - CollectionBuilder builder = getStructSchemaBuilder(); - for (String fieldName : ((StructTypeInfo) stypeInfo).getAllStructFieldNames()) { - builder.addField(getHCatFieldSchema(fieldName, ((StructTypeInfo) stypeInfo).getStructFieldTypeInfo(fieldName))); - } - return builder.build(); - } - - public static HCatSchema getHCatSchema(TypeInfo typeInfo) throws HCatException { - Category typeCategory = typeInfo.getCategory(); - HCatSchema hCatSchema; - if (Category.PRIMITIVE == typeCategory) { - hCatSchema = getStructSchemaBuilder().addField(new HCatFieldSchema(null, getPrimitiveHType(typeInfo), null)).build(); - } else if (Category.STRUCT == typeCategory) { - HCatSchema subSchema = constructHCatSchema((StructTypeInfo) typeInfo); - hCatSchema = getStructSchemaBuilder().addField(new HCatFieldSchema(null, Type.STRUCT, subSchema, null)).build(); - } else if (Category.LIST == typeCategory) { - CollectionBuilder builder = getListSchemaBuilder(); - builder.addField(getHCatFieldSchema(null, ((ListTypeInfo) typeInfo).getListElementTypeInfo())); - hCatSchema = new HCatSchema(Arrays.asList(new HCatFieldSchema("", Type.ARRAY, builder.build(), ""))); - } else if (Category.MAP == typeCategory) { - HCatFieldSchema.Type mapKeyType = getPrimitiveHType(((MapTypeInfo) typeInfo).getMapKeyTypeInfo()); - HCatSchema subSchema = getHCatSchema(((MapTypeInfo) typeInfo).getMapValueTypeInfo()); - MapBuilder builder = getMapSchemaBuilder(); - hCatSchema = builder.withKeyType(mapKeyType).withValueSchema(subSchema).build(); - } else { - throw new TypeNotPresentException(typeInfo.getTypeName(), null); - } - return hCatSchema; - } - - public static HCatSchema getHCatSchemaFromTypeString(String typeString) throws HCatException { - return getHCatSchema(TypeInfoUtils.getTypeInfoFromTypeString(typeString)); - } - - public static HCatSchema getHCatSchema(String schemaString) throws HCatException { - if ((schemaString == null) || (schemaString.trim().isEmpty())) { - return new HCatSchema(new ArrayList()); // empty HSchema construct - } - HCatSchema outerSchema = getHCatSchemaFromTypeString("struct<" + schemaString + ">"); - return outerSchema.get(0).getStructSubSchema(); - } - - public static FieldSchema getFieldSchema(HCatFieldSchema hcatFieldSchema) { - return new FieldSchema(hcatFieldSchema.getName(), hcatFieldSchema.getTypeString(), hcatFieldSchema.getComment()); - } - - public static List getFieldSchemas(List hcatFieldSchemas) { - List lfs = new ArrayList(); - for (HCatFieldSchema hfs : hcatFieldSchemas) { - lfs.add(getFieldSchema(hfs)); - } - return lfs; - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/DataTransferFactory.java hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/DataTransferFactory.java deleted file mode 100644 index d0b9cf2..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/DataTransferFactory.java +++ /dev/null @@ -1,136 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.data.transfer; - -import java.util.Map; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hcatalog.data.transfer.impl.HCatInputFormatReader; -import org.apache.hcatalog.data.transfer.impl.HCatOutputFormatWriter; -import org.apache.hcatalog.data.transfer.state.DefaultStateProvider; -import org.apache.hcatalog.data.transfer.state.StateProvider; - -/** - * Use this factory to get instances of {@link HCatReader} or {@link HCatWriter} - * at master and slave nodes. - */ - -public class DataTransferFactory { - - /** - * This should be called once from master node to obtain an instance of - * {@link HCatReader}. - * - * @param re - * ReadEntity built using {@link ReadEntity.Builder} - * @param config - * any configuration which master node wants to pass to HCatalog - * @return {@link HCatReader} - */ - public static HCatReader getHCatReader(final ReadEntity re, - final Map config) { - // In future, this may examine ReadEntity and/or config to return - // appropriate HCatReader - return new HCatInputFormatReader(re, config); - } - - /** - * This should only be called once from every slave node to obtain an instance - * of {@link HCatReader}. - * - * @param split - * input split obtained at master node - * @param config - * configuration obtained at master node - * @return {@link HCatReader} - */ - public static HCatReader getHCatReader(final InputSplit split, - final Configuration config) { - // In future, this may examine config to return appropriate HCatReader - return getHCatReader(split, config, DefaultStateProvider.get()); - } - - /** - * This should only be called once from every slave node to obtain an instance - * of {@link HCatReader}. This should be called if an external system has some - * state to provide to HCatalog. - * - * @param split - * input split obtained at master node - * @param config - * configuration obtained at master node - * @param sp - * {@link StateProvider} - * @return {@link HCatReader} - */ - public static HCatReader getHCatReader(final InputSplit split, - final Configuration config, StateProvider sp) { - // In future, this may examine config to return appropriate HCatReader - return new HCatInputFormatReader(split, config, sp); - } - - /** - * This should be called at master node to obtain an instance of - * {@link HCatWriter}. - * - * @param we - * WriteEntity built using {@link WriteEntity.Builder} - * @param config - * any configuration which master wants to pass to HCatalog - * @return {@link HCatWriter} - */ - public static HCatWriter getHCatWriter(final WriteEntity we, - final Map config) { - // In future, this may examine WriteEntity and/or config to return - // appropriate HCatWriter - return new HCatOutputFormatWriter(we, config); - } - - /** - * This should be called at slave nodes to obtain an instance of - * {@link HCatWriter}. - * - * @param cntxt - * {@link WriterContext} obtained at master node - * @return {@link HCatWriter} - */ - public static HCatWriter getHCatWriter(final WriterContext cntxt) { - // In future, this may examine context to return appropriate HCatWriter - return getHCatWriter(cntxt, DefaultStateProvider.get()); - } - - /** - * This should be called at slave nodes to obtain an instance of - * {@link HCatWriter}. If an external system has some mechanism for providing - * state to HCatalog, this constructor can be used. - * - * @param cntxt - * {@link WriterContext} obtained at master node - * @param sp - * {@link StateProvider} - * @return {@link HCatWriter} - */ - public static HCatWriter getHCatWriter(final WriterContext cntxt, - final StateProvider sp) { - // In future, this may examine context to return appropriate HCatWriter - return new HCatOutputFormatWriter(cntxt.getConf(), sp); - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/EntityBase.java hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/EntityBase.java deleted file mode 100644 index 9104391..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/EntityBase.java +++ /dev/null @@ -1,60 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.data.transfer; - -import java.util.Map; - -/** - * This is a base class for - * {@link ReadEntity.Builder} / {@link WriteEntity.Builder}. - * Many fields in them are common, so this class - * contains the common fields. - */ - -abstract class EntityBase { - - String region; - String tableName; - String dbName; - Map partitionKVs; - - /** - * Common methods for {@link ReadEntity} and {@link WriteEntity} - */ - - abstract static class Entity extends EntityBase { - - public String getRegion() { - return region; - } - - public String getTableName() { - return tableName; - } - - public String getDbName() { - return dbName; - } - - public Map getPartitionKVs() { - return partitionKVs; - } - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/HCatReader.java hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/HCatReader.java deleted file mode 100644 index d41bac8..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/HCatReader.java +++ /dev/null @@ -1,102 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.data.transfer; - -import java.util.Iterator; -import java.util.Map; -import java.util.Map.Entry; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.transfer.state.StateProvider; - -/** - * This abstract class is internal to HCatalog and abstracts away the notion of - * underlying system from which reads will be done. - */ - -public abstract class HCatReader { - - /** - * This should be called at master node to obtain {@link ReaderContext} which - * then should be serialized and sent to slave nodes. - * - * @return {@link ReaderContext} - * @throws HCatException - */ - public abstract ReaderContext prepareRead() throws HCatException; - - /** - * This should be called at slave nodes to read {@link HCatRecord}s - * - * @return {@link Iterator} of {@link HCatRecord} - * @throws HCatException - */ - public abstract Iterator read() throws HCatException; - - /** - * This constructor will be invoked by {@link DataTransferFactory} at master - * node. Don't use this constructor. Instead, use {@link DataTransferFactory} - * - * @param re - * @param config - */ - protected HCatReader(final ReadEntity re, final Map config) { - this(config); - this.re = re; - } - - /** - * This constructor will be invoked by {@link DataTransferFactory} at slave - * nodes. Don't use this constructor. Instead, use {@link DataTransferFactory} - * - * @param config - * @param sp - */ - - protected HCatReader(final Configuration config, StateProvider sp) { - this.conf = config; - this.sp = sp; - } - - protected ReadEntity re; // This will be null at slaves. - protected Configuration conf; - protected ReaderContext info; - protected StateProvider sp; // This will be null at master. - - private HCatReader(final Map config) { - Configuration conf = new Configuration(); - if (null != config) { - for (Entry kv : config.entrySet()) { - conf.set(kv.getKey(), kv.getValue()); - } - } - this.conf = conf; - } - - public Configuration getConf() { - if (null == conf) { - throw new IllegalStateException( - "HCatReader is not constructed correctly."); - } - return conf; - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/HCatWriter.java hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/HCatWriter.java deleted file mode 100644 index 039196f..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/HCatWriter.java +++ /dev/null @@ -1,114 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.data.transfer; - -import java.util.Iterator; -import java.util.Map; -import java.util.Map.Entry; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.transfer.state.StateProvider; - -/** - * This abstraction is internal to HCatalog. This is to facilitate writing to - * HCatalog from external systems. Don't try to instantiate this directly. - * Instead, use {@link DataTransferFactory} - */ - -public abstract class HCatWriter { - - protected Configuration conf; - protected WriteEntity we; // This will be null at slave nodes. - protected WriterContext info; - protected StateProvider sp; - - /** - * External system should invoke this method exactly once from a master node. - * - * @return {@link WriterContext} This should be serialized and sent to slave - * nodes to construct HCatWriter there. - * @throws HCatException - */ - public abstract WriterContext prepareWrite() throws HCatException; - - /** - * This method should be used at slave needs to perform writes. - * - * @param recordItr - * {@link Iterator} records to be written into HCatalog. - * @throws {@link HCatException} - */ - public abstract void write(final Iterator recordItr) - throws HCatException; - - /** - * This method should be called at master node. Primary purpose of this is to - * do metadata commit. - * - * @throws {@link HCatException} - */ - public abstract void commit(final WriterContext context) throws HCatException; - - /** - * This method should be called at master node. Primary purpose of this is to - * do cleanups in case of failures. - * - * @throws {@link HCatException} * - */ - public abstract void abort(final WriterContext context) throws HCatException; - - /** - * This constructor will be used at master node - * - * @param we - * WriteEntity defines where in storage records should be written to. - * @param config - * Any configuration which external system wants to communicate to - * HCatalog for performing writes. - */ - protected HCatWriter(final WriteEntity we, final Map config) { - this(config); - this.we = we; - } - - /** - * This constructor will be used at slave nodes. - * - * @param config - */ - protected HCatWriter(final Configuration config, final StateProvider sp) { - this.conf = config; - this.sp = sp; - } - - private HCatWriter(final Map config) { - Configuration conf = new Configuration(); - if (config != null) { - // user is providing config, so it could be null. - for (Entry kv : config.entrySet()) { - conf.set(kv.getKey(), kv.getValue()); - } - } - - this.conf = conf; - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/ReadEntity.java hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/ReadEntity.java deleted file mode 100644 index 5c197bd..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/ReadEntity.java +++ /dev/null @@ -1,89 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.data.transfer; - -import java.util.Map; - -public class ReadEntity extends EntityBase.Entity { - - private String filterString; - - /** - * Don't instantiate {@link ReadEntity} directly. Use, - * {@link ReadEntity.Builder} instead. - * - */ - private ReadEntity() { - // Not allowed - } - - private ReadEntity(Builder builder) { - - this.region = builder.region; - this.dbName = builder.dbName; - this.tableName = builder.tableName; - this.partitionKVs = builder.partitionKVs; - this.filterString = builder.filterString; - } - - public String getFilterString() { - return this.filterString; - } - - /** - * This class should be used to build {@link ReadEntity}. It follows builder - * pattern, letting you build your {@link ReadEntity} with whatever level of - * detail you want. - * - */ - public static class Builder extends EntityBase { - - private String filterString; - - public Builder withRegion(final String region) { - this.region = region; - return this; - } - - public Builder withDatabase(final String dbName) { - this.dbName = dbName; - return this; - } - - public Builder withTable(final String tblName) { - this.tableName = tblName; - return this; - } - - public Builder withPartition(final Map partKVs) { - this.partitionKVs = partKVs; - return this; - } - - public Builder withFilter(String filterString) { - this.filterString = filterString; - return this; - } - - public ReadEntity build() { - return new ReadEntity(this); - } - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/ReaderContext.java hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/ReaderContext.java deleted file mode 100644 index 44e9e35..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/ReaderContext.java +++ /dev/null @@ -1,89 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.data.transfer; - -import java.io.Externalizable; -import java.io.IOException; -import java.io.ObjectInput; -import java.io.ObjectOutput; -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hcatalog.mapreduce.HCatSplit; - -/** - * This class will contain information of different {@link InputSplit} obtained - * at master node and configuration. This class implements - * {@link Externalizable} so it can be serialized using standard java - * mechanisms. - */ -public class ReaderContext implements Externalizable, Configurable { - - private static final long serialVersionUID = -2656468331739574367L; - private List splits; - private Configuration conf; - - public ReaderContext() { - this.splits = new ArrayList(); - this.conf = new Configuration(); - } - - public void setInputSplits(final List splits) { - this.splits = splits; - } - - public List getSplits() { - return splits; - } - - @Override - public Configuration getConf() { - return conf; - } - - @Override - public void setConf(final Configuration config) { - conf = config; - } - - @Override - public void writeExternal(ObjectOutput out) throws IOException { - conf.write(out); - out.writeInt(splits.size()); - for (InputSplit split : splits) { - ((HCatSplit) split).write(out); - } - } - - @Override - public void readExternal(ObjectInput in) throws IOException, - ClassNotFoundException { - conf.readFields(in); - int numOfSplits = in.readInt(); - for (int i = 0; i < numOfSplits; i++) { - HCatSplit split = new HCatSplit(); - split.readFields(in); - splits.add(split); - } - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/WriteEntity.java hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/WriteEntity.java deleted file mode 100644 index 2b9ea3d..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/WriteEntity.java +++ /dev/null @@ -1,75 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.data.transfer; - -import java.util.Map; - -public class WriteEntity extends EntityBase.Entity { - - /** - * Don't instantiate {@link WriteEntity} directly. Use, {@link Builder} to - * build {@link WriteEntity}. - */ - - private WriteEntity() { - // Not allowed. - } - - private WriteEntity(Builder builder) { - this.region = builder.region; - this.dbName = builder.dbName; - this.tableName = builder.tableName; - this.partitionKVs = builder.partitionKVs; - } - - /** - * This class should be used to build {@link WriteEntity}. It follows builder - * pattern, letting you build your {@link WriteEntity} with whatever level of - * detail you want. - * - */ - public static class Builder extends EntityBase { - - public Builder withRegion(final String region) { - this.region = region; - return this; - } - - public Builder withDatabase(final String dbName) { - this.dbName = dbName; - return this; - } - - public Builder withTable(final String tblName) { - this.tableName = tblName; - return this; - } - - public Builder withPartition(final Map partKVs) { - this.partitionKVs = partKVs; - return this; - } - - public WriteEntity build() { - return new WriteEntity(this); - } - - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/WriterContext.java hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/WriterContext.java deleted file mode 100644 index a3fc07b..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/WriterContext.java +++ /dev/null @@ -1,65 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.data.transfer; - -import java.io.Externalizable; -import java.io.IOException; -import java.io.ObjectInput; -import java.io.ObjectOutput; - -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; - -/** - * This contains information obtained at master node to help prepare slave nodes - * for writer. This class implements {@link Externalizable} so it can be - * serialized using standard java mechanisms. Master should serialize it and - * make it available to slaves to prepare for writes. - */ -public class WriterContext implements Externalizable, Configurable { - - private static final long serialVersionUID = -5899374262971611840L; - private Configuration conf; - - public WriterContext() { - conf = new Configuration(); - } - - @Override - public Configuration getConf() { - return conf; - } - - @Override - public void setConf(final Configuration config) { - this.conf = config; - } - - @Override - public void writeExternal(ObjectOutput out) throws IOException { - conf.write(out); - } - - @Override - public void readExternal(ObjectInput in) throws IOException, - ClassNotFoundException { - conf.readFields(in); - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/impl/HCatInputFormatReader.java hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/impl/HCatInputFormatReader.java deleted file mode 100644 index 408611b..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/impl/HCatInputFormatReader.java +++ /dev/null @@ -1,137 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.data.transfer.impl; - -import java.io.IOException; -import java.util.Iterator; -import java.util.Map; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.TaskAttemptID; -import org.apache.hcatalog.common.ErrorType; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.transfer.HCatReader; -import org.apache.hcatalog.data.transfer.ReadEntity; -import org.apache.hcatalog.data.transfer.ReaderContext; -import org.apache.hcatalog.data.transfer.state.StateProvider; -import org.apache.hcatalog.mapreduce.HCatInputFormat; - -/** - * This reader reads via {@link HCatInputFormat} - * - */ -public class HCatInputFormatReader extends HCatReader { - - private InputSplit split; - - public HCatInputFormatReader(InputSplit split, Configuration config, - StateProvider sp) { - super(config, sp); - this.split = split; - } - - public HCatInputFormatReader(ReadEntity info, Map config) { - super(info, config); - } - - @Override - public ReaderContext prepareRead() throws HCatException { - try { - Job job = new Job(conf); - HCatInputFormat hcif = HCatInputFormat.setInput( - job, re.getDbName(), re.getTableName()).setFilter(re.getFilterString()); - ReaderContext cntxt = new ReaderContext(); - cntxt.setInputSplits(hcif.getSplits( - ShimLoader.getHadoopShims().getHCatShim().createJobContext(job.getConfiguration(), null))); - cntxt.setConf(job.getConfiguration()); - return cntxt; - } catch (IOException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } catch (InterruptedException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } - } - - @Override - public Iterator read() throws HCatException { - - HCatInputFormat inpFmt = new HCatInputFormat(); - RecordReader rr; - try { - TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, new TaskAttemptID()); - rr = inpFmt.createRecordReader(split, cntxt); - rr.initialize(split, cntxt); - } catch (IOException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } catch (InterruptedException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } - return new HCatRecordItr(rr); - } - - private static class HCatRecordItr implements Iterator { - - private RecordReader curRecReader; - - HCatRecordItr(RecordReader rr) { - curRecReader = rr; - } - - @Override - public boolean hasNext() { - try { - boolean retVal = curRecReader.nextKeyValue(); - if (retVal) { - return true; - } - // if its false, we need to close recordReader. - curRecReader.close(); - return false; - } catch (IOException e) { - throw new RuntimeException(e); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - - @Override - public HCatRecord next() { - try { - return curRecReader.getCurrentValue(); - } catch (IOException e) { - throw new RuntimeException(e); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - - @Override - public void remove() { - throw new UnsupportedOperationException("Not allowed"); - } - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/impl/HCatOutputFormatWriter.java hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/impl/HCatOutputFormatWriter.java deleted file mode 100644 index 086d3b2..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/impl/HCatOutputFormatWriter.java +++ /dev/null @@ -1,162 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.data.transfer.impl; - -import java.io.IOException; -import java.util.Iterator; -import java.util.Map; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobStatus.State; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.TaskAttemptID; -import org.apache.hcatalog.common.ErrorType; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.transfer.HCatWriter; -import org.apache.hcatalog.data.transfer.WriteEntity; -import org.apache.hcatalog.data.transfer.WriterContext; -import org.apache.hcatalog.data.transfer.state.StateProvider; -import org.apache.hcatalog.mapreduce.HCatOutputFormat; -import org.apache.hcatalog.mapreduce.OutputJobInfo; - -/** - * This writer writes via {@link HCatOutputFormat} - * - */ -public class HCatOutputFormatWriter extends HCatWriter { - - public HCatOutputFormatWriter(WriteEntity we, Map config) { - super(we, config); - } - - public HCatOutputFormatWriter(Configuration config, StateProvider sp) { - super(config, sp); - } - - @Override - public WriterContext prepareWrite() throws HCatException { - OutputJobInfo jobInfo = OutputJobInfo.create(we.getDbName(), - we.getTableName(), we.getPartitionKVs()); - Job job; - try { - job = new Job(conf); - HCatOutputFormat.setOutput(job, jobInfo); - HCatOutputFormat.setSchema(job, HCatOutputFormat.getTableSchema(job)); - HCatOutputFormat outFormat = new HCatOutputFormat(); - outFormat.checkOutputSpecs(job); - outFormat.getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( - job.getConfiguration(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())).setupJob(job); - } catch (IOException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } catch (InterruptedException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } - WriterContext cntxt = new WriterContext(); - cntxt.setConf(job.getConfiguration()); - return cntxt; - } - - @Override - public void write(Iterator recordItr) throws HCatException { - - int id = sp.getId(); - setVarsInConf(id); - HCatOutputFormat outFormat = new HCatOutputFormat(); - TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( - conf, new TaskAttemptID(ShimLoader.getHadoopShims().getHCatShim().createTaskID(), id)); - OutputCommitter committer = null; - RecordWriter, HCatRecord> writer; - try { - committer = outFormat.getOutputCommitter(cntxt); - committer.setupTask(cntxt); - writer = outFormat.getRecordWriter(cntxt); - while (recordItr.hasNext()) { - HCatRecord rec = recordItr.next(); - writer.write(null, rec); - } - writer.close(cntxt); - if (committer.needsTaskCommit(cntxt)) { - committer.commitTask(cntxt); - } - } catch (IOException e) { - if (null != committer) { - try { - committer.abortTask(cntxt); - } catch (IOException e1) { - throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1); - } - } - throw new HCatException("Failed while writing", e); - } catch (InterruptedException e) { - if (null != committer) { - try { - committer.abortTask(cntxt); - } catch (IOException e1) { - throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1); - } - } - throw new HCatException("Failed while writing", e); - } - } - - @Override - public void commit(WriterContext context) throws HCatException { - try { - new HCatOutputFormat().getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( - context.getConf(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())) - .commitJob(ShimLoader.getHadoopShims().getHCatShim().createJobContext(context.getConf(), null)); - } catch (IOException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } catch (InterruptedException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } - } - - @Override - public void abort(WriterContext context) throws HCatException { - try { - new HCatOutputFormat().getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( - context.getConf(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())) - .abortJob(ShimLoader.getHadoopShims().getHCatShim().createJobContext( - context.getConf(), null), State.FAILED); - } catch (IOException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } catch (InterruptedException e) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); - } - } - - private void setVarsInConf(int id) { - - // Following two config keys are required by FileOutputFormat to work - // correctly. - // In usual case of Hadoop, JobTracker will set these before launching - // tasks. - // Since there is no jobtracker here, we set it ourself. - conf.setInt("mapred.task.partition", id); - conf.set("mapred.task.id", "attempt__0000_r_000000_" + id); - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/state/DefaultStateProvider.java hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/state/DefaultStateProvider.java deleted file mode 100644 index bd606e0..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/state/DefaultStateProvider.java +++ /dev/null @@ -1,48 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.data.transfer.state; - -import java.text.NumberFormat; -import java.util.Random; - -public class DefaultStateProvider implements StateProvider { - - /** - * Default implementation. Here, ids are generated randomly. - */ - @Override - public int getId() { - - NumberFormat numberFormat = NumberFormat.getInstance(); - numberFormat.setMinimumIntegerDigits(5); - numberFormat.setGroupingUsed(false); - return Integer - .parseInt(numberFormat.format(Math.abs(new Random().nextInt()))); - } - - private static StateProvider sp; - - public static synchronized StateProvider get() { - if (null == sp) { - sp = new DefaultStateProvider(); - } - return sp; - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/state/StateProvider.java hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/state/StateProvider.java deleted file mode 100644 index 6fc7b66..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/data/transfer/state/StateProvider.java +++ /dev/null @@ -1,35 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.data.transfer.state; - -/** - * If external system wants to communicate any state to slaves, they can do so - * via this interface. One example of this in case of Map-Reduce is ids assigned - * by JobTracker to TaskTracker. - */ -public interface StateProvider { - - /** - * This method should return id assigned to slave node. - * - * @return id - */ - public int getId(); -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/har/HarOutputCommitterPostProcessor.java hcatalog/core/src/main/java/org/apache/hcatalog/har/HarOutputCommitterPostProcessor.java deleted file mode 100644 index be5c73a..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/har/HarOutputCommitterPostProcessor.java +++ /dev/null @@ -1,125 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.har; - -import java.io.IOException; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.tools.HadoopArchives; -import org.apache.hadoop.util.ToolRunner; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; - -public class HarOutputCommitterPostProcessor { - - boolean isEnabled = false; - - public boolean isEnabled() { - return isEnabled; - } - - public void setEnabled(boolean enabled) { - this.isEnabled = enabled; - } - - - public void exec(JobContext context, Partition partition, Path partPath) throws IOException { -// LOG.info("Archiving partition ["+partPath.toString()+"]"); - makeHar(context, partPath.toUri().toString(), harFile(partPath)); - partition.getParameters().put(hive_metastoreConstants.IS_ARCHIVED, "true"); - } - - public String harFile(Path ptnPath) throws IOException { - String harFile = ptnPath.toString().replaceFirst("/+$", "") + ".har"; -// LOG.info("har file : " + harFile); - return harFile; - } - - public String getParentFSPath(Path ptnPath) throws IOException { - return ptnPath.toUri().getPath().replaceFirst("/+$", ""); - } - - public String getProcessedLocation(Path ptnPath) throws IOException { - String harLocn = ("har://" + ptnPath.toUri().getPath()).replaceFirst("/+$", "") + ".har" + Path.SEPARATOR; -// LOG.info("har location : " + harLocn); - return harLocn; - } - - - /** - * Creates a har file from the contents of a given directory, using that as root. - * @param dir Directory to archive - * @param harFile The HAR file to create - */ - public static void makeHar(JobContext context, String dir, String harFile) throws IOException { -// Configuration conf = context.getConfiguration(); -// Credentials creds = context.getCredentials(); - -// HCatUtil.logAllTokens(LOG,context); - - int lastSep = harFile.lastIndexOf(Path.SEPARATOR_CHAR); - Path archivePath = new Path(harFile.substring(0, lastSep)); - final String[] args = { - "-archiveName", - harFile.substring(lastSep + 1, harFile.length()), - "-p", - dir, - "*", - archivePath.toString() - }; -// for (String arg : args){ -// LOG.info("Args to har : "+ arg); -// } - try { - Configuration newConf = new Configuration(); - FileSystem fs = archivePath.getFileSystem(newConf); - - String hadoopTokenFileLocationEnvSetting = System.getenv(HCatConstants.SYSENV_HADOOP_TOKEN_FILE_LOCATION); - if ((hadoopTokenFileLocationEnvSetting != null) && (!hadoopTokenFileLocationEnvSetting.isEmpty())) { - newConf.set(HCatConstants.CONF_MAPREDUCE_JOB_CREDENTIALS_BINARY, hadoopTokenFileLocationEnvSetting); -// LOG.info("System.getenv(\"HADOOP_TOKEN_FILE_LOCATION\") =["+ System.getenv("HADOOP_TOKEN_FILE_LOCATION")+"]"); - } -// for (FileStatus ds : fs.globStatus(new Path(dir, "*"))){ -// LOG.info("src : "+ds.getPath().toUri().toString()); -// } - - final HadoopArchives har = new HadoopArchives(newConf); - int rc = ToolRunner.run(har, args); - if (rc != 0) { - throw new Exception("Har returned error code " + rc); - } - -// for (FileStatus hs : fs.globStatus(new Path(harFile, "*"))){ -// LOG.info("dest : "+hs.getPath().toUri().toString()); -// } -// doHarCheck(fs,harFile); -// LOG.info("Nuking " + dir); - fs.delete(new Path(dir), true); - } catch (Exception e) { - throw new HCatException("Error creating Har [" + harFile + "] from [" + dir + "]", e); - } - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/DefaultOutputCommitterContainer.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/DefaultOutputCommitterContainer.java deleted file mode 100644 index 36e8387..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/DefaultOutputCommitterContainer.java +++ /dev/null @@ -1,107 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.JobStatus.State; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Part of the DefaultOutput*Container classes - * See {@link DefaultOutputFormatContainer} for more information - */ -class DefaultOutputCommitterContainer extends OutputCommitterContainer { - - private static final Logger LOG = LoggerFactory.getLogger(DefaultOutputCommitterContainer.class); - - /** - * @param context current JobContext - * @param baseCommitter OutputCommitter to contain - * @throws IOException - */ - public DefaultOutputCommitterContainer(JobContext context, org.apache.hadoop.mapred.OutputCommitter baseCommitter) throws IOException { - super(context, baseCommitter); - } - - @Override - public void abortTask(TaskAttemptContext context) throws IOException { - getBaseOutputCommitter().abortTask(HCatMapRedUtil.createTaskAttemptContext(context)); - } - - @Override - public void commitTask(TaskAttemptContext context) throws IOException { - getBaseOutputCommitter().commitTask(HCatMapRedUtil.createTaskAttemptContext(context)); - } - - @Override - public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { - return getBaseOutputCommitter().needsTaskCommit(HCatMapRedUtil.createTaskAttemptContext(context)); - } - - @Override - public void setupJob(JobContext context) throws IOException { - getBaseOutputCommitter().setupJob(HCatMapRedUtil.createJobContext(context)); - } - - @Override - public void setupTask(TaskAttemptContext context) throws IOException { - getBaseOutputCommitter().setupTask(HCatMapRedUtil.createTaskAttemptContext(context)); - } - - @Override - public void abortJob(JobContext jobContext, State state) throws IOException { - getBaseOutputCommitter().abortJob(HCatMapRedUtil.createJobContext(jobContext), state); - cleanupJob(jobContext); - } - - @Override - public void commitJob(JobContext jobContext) throws IOException { - getBaseOutputCommitter().commitJob(HCatMapRedUtil.createJobContext(jobContext)); - cleanupJob(jobContext); - } - - @Override - public void cleanupJob(JobContext context) throws IOException { - getBaseOutputCommitter().cleanupJob(HCatMapRedUtil.createJobContext(context)); - - //Cancel HCat and JobTracker tokens - HiveMetaStoreClient client = null; - try { - HiveConf hiveConf = HCatUtil.getHiveConf(context.getConfiguration()); - client = HCatUtil.getHiveClient(hiveConf); - String tokenStrForm = client.getTokenStrForm(); - if (tokenStrForm != null && context.getConfiguration().get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { - client.cancelDelegationToken(tokenStrForm); - } - } catch (Exception e) { - LOG.warn("Failed to cancel delegation token", e); - } finally { - HCatUtil.closeHiveClientQuietly(client); - } - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/DefaultOutputFormatContainer.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/DefaultOutputFormatContainer.java deleted file mode 100644 index 2c630b6..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/DefaultOutputFormatContainer.java +++ /dev/null @@ -1,101 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; - -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.HCatRecord; - -import java.io.IOException; -import java.text.NumberFormat; - -/** - * Bare bones implementation of OutputFormatContainer. Does only the required - * tasks to work properly with HCatalog. HCatalog features which require a - * storage specific implementation are unsupported (ie partitioning). - */ -class DefaultOutputFormatContainer extends OutputFormatContainer { - - private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance(); - - static { - NUMBER_FORMAT.setMinimumIntegerDigits(5); - NUMBER_FORMAT.setGroupingUsed(false); - } - - public DefaultOutputFormatContainer(org.apache.hadoop.mapred.OutputFormat, Writable> of) { - super(of); - } - - static synchronized String getOutputName(int partition) { - return "part-" + NUMBER_FORMAT.format(partition); - } - - /** - * Get the record writer for the job. Uses the storagehandler's OutputFormat - * to get the record writer. - * @param context the information about the current task. - * @return a RecordWriter to write the output for the job. - * @throws IOException - */ - @Override - public RecordWriter, HCatRecord> - getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { - String name = getOutputName(context.getTaskAttemptID().getTaskID().getId()); - return new DefaultRecordWriterContainer(context, - getBaseOutputFormat().getRecordWriter(null, new JobConf(context.getConfiguration()), name, InternalUtil.createReporter(context))); - } - - - /** - * Get the output committer for this output format. This is responsible - * for ensuring the output is committed correctly. - * @param context the task context - * @return an output committer - * @throws IOException - * @throws InterruptedException - */ - @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context) - throws IOException, InterruptedException { - return new DefaultOutputCommitterContainer(context, new JobConf(context.getConfiguration()).getOutputCommitter()); - } - - /** - * Check for validity of the output-specification for the job. - * @param context information about the job - * @throws IOException when output should not be attempted - */ - @Override - public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { - org.apache.hadoop.mapred.OutputFormat, ? super Writable> outputFormat = getBaseOutputFormat(); - JobConf jobConf = new JobConf(context.getConfiguration()); - outputFormat.checkOutputSpecs(null, jobConf); - HCatUtil.copyConf(jobConf, context.getConfiguration()); - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/DefaultRecordWriterContainer.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/DefaultRecordWriterContainer.java deleted file mode 100644 index fe7f72a..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/DefaultRecordWriterContainer.java +++ /dev/null @@ -1,82 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; - -import org.apache.hadoop.hive.serde2.SerDe; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.util.ReflectionUtils; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.HCatRecord; - -/** - * Part of the DefaultOutput*Container classes - * See {@link DefaultOutputFormatContainer} for more information - */ -class DefaultRecordWriterContainer extends RecordWriterContainer { - - private final HCatStorageHandler storageHandler; - private final SerDe serDe; - private final OutputJobInfo jobInfo; - private final ObjectInspector hcatRecordOI; - - /** - * @param context current JobContext - * @param baseRecordWriter RecordWriter to contain - * @throws IOException - * @throws InterruptedException - */ - public DefaultRecordWriterContainer(TaskAttemptContext context, - org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseRecordWriter) throws IOException, InterruptedException { - super(context, baseRecordWriter); - jobInfo = HCatOutputFormat.getJobInfo(context); - storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); - HCatOutputFormat.configureOutputStorageHandler(context); - serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); - hcatRecordOI = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); - try { - InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); - } catch (SerDeException e) { - throw new IOException("Failed to initialize SerDe", e); - } - } - - @Override - public void close(TaskAttemptContext context) throws IOException, - InterruptedException { - getBaseRecordWriter().close(InternalUtil.createReporter(context)); - } - - @Override - public void write(WritableComparable key, HCatRecord value) throws IOException, - InterruptedException { - try { - getBaseRecordWriter().write(null, serDe.serialize(value.getAll(), hcatRecordOI)); - } catch (SerDeException e) { - throw new IOException("Failed to serialize object", e); - } - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputCommitterContainer.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputCommitterContainer.java deleted file mode 100644 index 47a1d5b..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputCommitterContainer.java +++ /dev/null @@ -1,750 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.net.URI; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.InvalidOperationException; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.JobStatus.State; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hcatalog.common.ErrorType; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; -import org.apache.hcatalog.har.HarOutputCommitterPostProcessor; -import org.apache.thrift.TException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Part of the FileOutput*Container classes - * See {@link FileOutputFormatContainer} for more information - */ -class FileOutputCommitterContainer extends OutputCommitterContainer { - - private static final String TEMP_DIR_NAME = "_temporary"; - private static final String LOGS_DIR_NAME = "_logs"; - - private static final Logger LOG = LoggerFactory.getLogger(FileOutputCommitterContainer.class); - private final boolean dynamicPartitioningUsed; - private boolean partitionsDiscovered; - - private Map> partitionsDiscoveredByPath; - private Map contextDiscoveredByPath; - private final HCatStorageHandler cachedStorageHandler; - - HarOutputCommitterPostProcessor harProcessor = new HarOutputCommitterPostProcessor(); - - private String ptnRootLocation = null; - - private OutputJobInfo jobInfo = null; - - /** - * @param context current JobContext - * @param baseCommitter OutputCommitter to contain - * @throws IOException - */ - public FileOutputCommitterContainer(JobContext context, - org.apache.hadoop.mapred.OutputCommitter baseCommitter) throws IOException { - super(context, baseCommitter); - jobInfo = HCatOutputFormat.getJobInfo(context); - dynamicPartitioningUsed = jobInfo.isDynamicPartitioningUsed(); - - this.partitionsDiscovered = !dynamicPartitioningUsed; - cachedStorageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); - } - - @Override - public void abortTask(TaskAttemptContext context) throws IOException { - if (!dynamicPartitioningUsed) { - getBaseOutputCommitter().abortTask(HCatMapRedUtil.createTaskAttemptContext(context)); - } - } - - @Override - public void commitTask(TaskAttemptContext context) throws IOException { - if (!dynamicPartitioningUsed) { - //See HCATALOG-499 - FileOutputFormatContainer.setWorkOutputPath(context); - getBaseOutputCommitter().commitTask(HCatMapRedUtil.createTaskAttemptContext(context)); - } - } - - @Override - public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { - if (!dynamicPartitioningUsed) { - return getBaseOutputCommitter().needsTaskCommit(HCatMapRedUtil.createTaskAttemptContext(context)); - } else { - // called explicitly through FileRecordWriterContainer.close() if dynamic - return false by default - return false; - } - } - - @Override - public void setupJob(JobContext context) throws IOException { - if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { - getBaseOutputCommitter().setupJob(HCatMapRedUtil.createJobContext(context)); - } - // in dynamic usecase, called through FileRecordWriterContainer - } - - @Override - public void setupTask(TaskAttemptContext context) throws IOException { - if (!dynamicPartitioningUsed) { - getBaseOutputCommitter().setupTask(HCatMapRedUtil.createTaskAttemptContext(context)); - } - } - - @Override - public void abortJob(JobContext jobContext, State state) throws IOException { - try { - if (dynamicPartitioningUsed) { - discoverPartitions(jobContext); - } - org.apache.hadoop.mapred.JobContext mapRedJobContext = HCatMapRedUtil - .createJobContext(jobContext); - if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { - getBaseOutputCommitter().abortJob(mapRedJobContext, state); - } else if (dynamicPartitioningUsed) { - for (JobContext currContext : contextDiscoveredByPath.values()) { - try { - new JobConf(currContext.getConfiguration()) - .getOutputCommitter().abortJob(currContext, - state); - } catch (Exception e) { - throw new IOException(e); - } - } - } - Path src; - OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext); - if (dynamicPartitioningUsed) { - src = new Path(getPartitionRootLocation(jobInfo.getLocation(), jobInfo.getTableInfo().getTable() - .getPartitionKeysSize())); - } else { - src = new Path(jobInfo.getLocation()); - } - FileSystem fs = src.getFileSystem(jobContext.getConfiguration()); - // Note fs.delete will fail on Windows. The reason is in OutputCommitter, - // Hadoop is still writing to _logs/history. On Linux, OS don't care file is still - // open and remove the directory anyway, but on Windows, OS refuse to remove a - // directory containing open files. So on Windows, we will leave output directory - // behind when job fail. User needs to remove the output directory manually - LOG.info("Job failed. Try cleaning up temporary directory [{}].", src); - fs.delete(src, true); - } finally { - cancelDelegationTokens(jobContext); - } - } - - public static final String SUCCEEDED_FILE_NAME = "_SUCCESS"; - static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER = - "mapreduce.fileoutputcommitter.marksuccessfuljobs"; - - private static boolean getOutputDirMarking(Configuration conf) { - return conf.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, - false); - } - - @Override - public void commitJob(JobContext jobContext) throws IOException { - try { - if (dynamicPartitioningUsed) { - discoverPartitions(jobContext); - // Commit each partition so it gets moved out of the job work - // dir - for (JobContext context : contextDiscoveredByPath.values()) { - new JobConf(context.getConfiguration()) - .getOutputCommitter().commitJob(context); - } - } - if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { - getBaseOutputCommitter().commitJob( - HCatMapRedUtil.createJobContext(jobContext)); - } - registerPartitions(jobContext); - // create _SUCCESS FILE if so requested. - OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext); - if (getOutputDirMarking(jobContext.getConfiguration())) { - Path outputPath = new Path(jobInfo.getLocation()); - FileSystem fileSys = outputPath.getFileSystem(jobContext - .getConfiguration()); - // create a file in the folder to mark it - if (fileSys.exists(outputPath)) { - Path filePath = new Path(outputPath, - SUCCEEDED_FILE_NAME); - if (!fileSys.exists(filePath)) { // may have been - // created by - // baseCommitter.commitJob() - fileSys.create(filePath).close(); - } - } - } - } finally { - cancelDelegationTokens(jobContext); - } - } - - @Override - public void cleanupJob(JobContext context) throws IOException { - throw new IOException("The method cleanupJob is deprecated and should not be called."); - } - - private String getPartitionRootLocation(String ptnLocn, int numPtnKeys) { - if (ptnRootLocation == null) { - // we only need to calculate it once, it'll be the same for other partitions in this job. - Path ptnRoot = new Path(ptnLocn); - for (int i = 0; i < numPtnKeys; i++) { -// LOG.info("Getting parent of "+ptnRoot.getName()); - ptnRoot = ptnRoot.getParent(); - } - ptnRootLocation = ptnRoot.toString(); - } -// LOG.info("Returning final parent : "+ptnRootLocation); - return ptnRootLocation; - } - - /** - * Generate partition metadata object to be used to add to metadata. - * @param context The job context. - * @param jobInfo The OutputJobInfo. - * @param partLocnRoot The table-equivalent location root of the partition - * (temporary dir if dynamic partition, table dir if static) - * @param partKVs The keyvalue pairs that form the partition - * @param outputSchema The output schema for the partition - * @param params The parameters to store inside the partition - * @param table The Table metadata object under which this Partition will reside - * @param fs FileSystem object to operate on the underlying filesystem - * @param grpName Group name that owns the table dir - * @param perms FsPermission that's the default permission of the table dir. - * @return Constructed Partition metadata object - * @throws java.io.IOException - */ - - private Partition constructPartition( - JobContext context, OutputJobInfo jobInfo, - String partLocnRoot, Map partKVs, - HCatSchema outputSchema, Map params, - Table table, FileSystem fs, - String grpName, FsPermission perms) throws IOException { - - Partition partition = new Partition(); - partition.setDbName(table.getDbName()); - partition.setTableName(table.getTableName()); - partition.setSd(new StorageDescriptor(table.getTTable().getSd())); - - List fields = new ArrayList(); - for (HCatFieldSchema fieldSchema : outputSchema.getFields()) { - fields.add(HCatSchemaUtils.getFieldSchema(fieldSchema)); - } - - partition.getSd().setCols(fields); - - partition.setValues(FileOutputFormatContainer.getPartitionValueList(table, partKVs)); - - partition.setParameters(params); - - // Sets permissions and group name on partition dirs and files. - - Path partPath; - if (Boolean.valueOf((String)table.getProperty("EXTERNAL")) - && jobInfo.getLocation() != null && jobInfo.getLocation().length() > 0) { - // honor external table that specifies the location - partPath = new Path(jobInfo.getLocation()); - } else { - partPath = new Path(partLocnRoot); - int i = 0; - for (FieldSchema partKey : table.getPartitionKeys()) { - if (i++ != 0) { - applyGroupAndPerms(fs, partPath, perms, grpName, false); - } - partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs); - } - } - - // Apply the group and permissions to the leaf partition and files. - // Need not bother in case of HDFS as permission is taken care of by setting UMask - if (!ShimLoader.getHadoopShims().getHCatShim().isFileInHDFS(fs, partPath)) { - applyGroupAndPerms(fs, partPath, perms, grpName, true); - } - - // Set the location in the StorageDescriptor - if (dynamicPartitioningUsed) { - String dynamicPartitionDestination = getFinalDynamicPartitionDestination(table, partKVs); - if (harProcessor.isEnabled()) { - harProcessor.exec(context, partition, partPath); - partition.getSd().setLocation( - harProcessor.getProcessedLocation(new Path(dynamicPartitionDestination))); - } else { - partition.getSd().setLocation(dynamicPartitionDestination); - } - } else { - partition.getSd().setLocation(partPath.toString()); - } - return partition; - } - - private void applyGroupAndPerms(FileSystem fs, Path dir, FsPermission permission, - String group, boolean recursive) - throws IOException { - fs.setPermission(dir, permission); - if (recursive) { - for (FileStatus fileStatus : fs.listStatus(dir)) { - if (fileStatus.isDir()) { - applyGroupAndPerms(fs, fileStatus.getPath(), permission, group, true); - } else { - fs.setPermission(fileStatus.getPath(), permission); - } - } - } - } - - private String getFinalDynamicPartitionDestination(Table table, Map partKVs) { - // file:///tmp/hcat_junit_warehouse/employee/_DYN0.7770480401313761/emp_country=IN/emp_state=KA -> - // file:///tmp/hcat_junit_warehouse/employee/emp_country=IN/emp_state=KA - Path partPath = new Path(table.getTTable().getSd().getLocation()); - for (FieldSchema partKey : table.getPartitionKeys()) { - partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs); - } - return partPath.toString(); - } - - private Map getStorerParameterMap(StorerInfo storer) { - Map params = new HashMap(); - - //Copy table level hcat.* keys to the partition - for (Entry entry : storer.getProperties().entrySet()) { - params.put(entry.getKey().toString(), entry.getValue().toString()); - } - return params; - } - - private Path constructPartialPartPath(Path partialPath, String partKey, Map partKVs) { - - StringBuilder sb = new StringBuilder(FileUtils.escapePathName(partKey)); - sb.append("="); - sb.append(FileUtils.escapePathName(partKVs.get(partKey))); - return new Path(partialPath, sb.toString()); - } - - /** - * Update table schema, adding new columns as added for the partition. - * @param client the client - * @param table the table - * @param partitionSchema the schema of the partition - * @throws java.io.IOException Signals that an I/O exception has occurred. - * @throws org.apache.hadoop.hive.metastore.api.InvalidOperationException the invalid operation exception - * @throws org.apache.hadoop.hive.metastore.api.MetaException the meta exception - * @throws org.apache.thrift.TException the t exception - */ - private void updateTableSchema(HiveMetaStoreClient client, Table table, - HCatSchema partitionSchema) throws IOException, InvalidOperationException, MetaException, TException { - - - List newColumns = HCatUtil.validatePartitionSchema(table, partitionSchema); - - if (newColumns.size() != 0) { - List tableColumns = new ArrayList(table.getTTable().getSd().getCols()); - tableColumns.addAll(newColumns); - - //Update table schema to add the newly added columns - table.getTTable().getSd().setCols(tableColumns); - client.alter_table(table.getDbName(), table.getTableName(), table.getTTable()); - } - } - - /** - * Move all of the files from the temp directory to the final location - * @param fs the output file system - * @param file the file to move - * @param srcDir the source directory - * @param destDir the target directory - * @param dryRun - a flag that simply tests if this move would succeed or not based - * on whether other files exist where we're trying to copy - * @throws java.io.IOException - */ - private void moveTaskOutputs(FileSystem fs, - Path file, - Path srcDir, - Path destDir, final boolean dryRun) throws IOException { - - if (file.getName().equals(TEMP_DIR_NAME) || file.getName().equals(LOGS_DIR_NAME) || file.getName().equals(SUCCEEDED_FILE_NAME)) { - return; - } - final Path finalOutputPath = getFinalPath(file, srcDir, destDir); - if (fs.isFile(file)) { - if (dryRun){ - if(LOG.isDebugEnabled()) { - LOG.debug("Testing if moving file: [" + file + "] to [" - + finalOutputPath + "] would cause a problem"); - } - if (fs.exists(finalOutputPath)) { - throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Data already exists in " + finalOutputPath - + ", duplicate publish not possible."); - } - } else { - if(LOG.isDebugEnabled()) { - LOG.debug("Moving file: [ " + file + "] to [" + finalOutputPath + "]"); - } - // Make sure the parent directory exists. It is not an error - // to recreate an existing directory - fs.mkdirs(finalOutputPath.getParent()); - if (!fs.rename(file, finalOutputPath)) { - if (!fs.delete(finalOutputPath, true)) { - throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Failed to delete existing path " + finalOutputPath); - } - if (!fs.rename(file, finalOutputPath)) { - throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Failed to move output to " + finalOutputPath); - } - } - } - } else if(fs.getFileStatus(file).isDir()) { - FileStatus[] children = fs.listStatus(file); - FileStatus firstChild = null; - if (children != null) { - int index=0; - while (index < children.length) { - if (!children[index].getPath().getName().equals(TEMP_DIR_NAME) && !children[index].getPath().getName().equals(LOGS_DIR_NAME) && !children[index].getPath().getName().equals(SUCCEEDED_FILE_NAME)) { - firstChild = children[index]; - break; - } - index++; - } - } - if(firstChild!=null && firstChild.isDir()) { - // If the first child is directory, then rest would be directory too according to HCatalog dir structure - // recurse in that case - for (FileStatus child : children) { - moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun); - } - } else { - - if (!dryRun) { - if (dynamicPartitioningUsed) { - // Optimization: if the first child is file, we have reached the leaf directory, move the parent directory itself - // instead of moving each file under the directory. See HCATALOG-538 - - final Path parentDir = finalOutputPath.getParent(); - // Create the directory - Path placeholder = new Path(parentDir, "_placeholder"); - if (fs.mkdirs(parentDir)) { - // It is weired but we need a placeholder, - // otherwise rename cannot move file to the right place - fs.create(placeholder).close(); - } - if (LOG.isDebugEnabled()) { - LOG.debug("Moving directory: " + file + " to " + parentDir); - } - if (!fs.rename(file, parentDir)) { - final String msg = "Failed to move file: " + file + " to " + parentDir; - LOG.error(msg); - throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg); - } - fs.delete(placeholder, false); - } else { - // In case of no partition we have to move each file - for (FileStatus child : children) { - moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun); - } - } - } else { - if(fs.exists(finalOutputPath)) { - throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Data already exists in " + finalOutputPath - + ", duplicate publish not possible."); - } - } - } - } else { - // Should never happen - final String msg = "Unknown file type being asked to be moved, erroring out"; - throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg); - } - } - - /** - * Find the final name of a given output file, given the output directory - * and the work directory. - * @param file the file to move - * @param src the source directory - * @param dest the target directory - * @return the final path for the specific output file - * @throws java.io.IOException - */ - private Path getFinalPath(Path file, Path src, - Path dest) throws IOException { - URI taskOutputUri = file.toUri(); - URI relativePath = src.toUri().relativize(taskOutputUri); - if (taskOutputUri == relativePath) { - throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Can not get the relative path: base = " + - src + " child = " + file); - } - if (relativePath.getPath().length() > 0) { - return new Path(dest, relativePath.getPath()); - } else { - return dest; - } - } - - /** - * Run to discover dynamic partitions available - */ - private void discoverPartitions(JobContext context) throws IOException { - if (!partitionsDiscovered) { - // LOG.info("discover ptns called"); - OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context); - - harProcessor.setEnabled(jobInfo.getHarRequested()); - - List dynamicPartCols = jobInfo.getPosOfDynPartCols(); - int maxDynamicPartitions = jobInfo.getMaxDynamicPartitions(); - - Path loadPath = new Path(jobInfo.getLocation()); - FileSystem fs = loadPath.getFileSystem(context.getConfiguration()); - - // construct a path pattern (e.g., /*/*) to find all dynamically generated paths - String dynPathSpec = loadPath.toUri().getPath(); - dynPathSpec = dynPathSpec.replaceAll("__HIVE_DEFAULT_PARTITION__", "*"); - - // LOG.info("Searching for "+dynPathSpec); - Path pathPattern = new Path(dynPathSpec); - FileStatus[] status = fs.globStatus(pathPattern); - - partitionsDiscoveredByPath = new LinkedHashMap>(); - contextDiscoveredByPath = new LinkedHashMap(); - - - if (status.length == 0) { - // LOG.warn("No partition found genereated by dynamic partitioning in [" - // +loadPath+"] with depth["+jobInfo.getTable().getPartitionKeysSize() - // +"], dynSpec["+dynPathSpec+"]"); - } else { - if ((maxDynamicPartitions != -1) && (status.length > maxDynamicPartitions)) { - this.partitionsDiscovered = true; - throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, - "Number of dynamic partitions being created " - + "exceeds configured max allowable partitions[" - + maxDynamicPartitions - + "], increase parameter [" - + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname - + "] if needed."); - } - - for (FileStatus st : status) { - LinkedHashMap fullPartSpec = new LinkedHashMap(); - Warehouse.makeSpecFromName(fullPartSpec, st.getPath()); - partitionsDiscoveredByPath.put(st.getPath().toString(), fullPartSpec); - JobConf jobConf = (JobConf)context.getConfiguration(); - JobContext currContext = HCatMapRedUtil.createJobContext( - jobConf, - context.getJobID(), - InternalUtil.createReporter(HCatMapRedUtil.createTaskAttemptContext(jobConf, - ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID()))); - HCatOutputFormat.configureOutputStorageHandler(currContext, jobInfo, fullPartSpec); - contextDiscoveredByPath.put(st.getPath().toString(), currContext); - } - } - - // for (Entry> spec : partitionsDiscoveredByPath.entrySet()){ - // LOG.info("Partition "+ spec.getKey()); - // for (Entry e : spec.getValue().entrySet()){ - // LOG.info(e.getKey() + "=>" +e.getValue()); - // } - // } - - this.partitionsDiscovered = true; - } - } - - private void registerPartitions(JobContext context) throws IOException{ - if (dynamicPartitioningUsed){ - discoverPartitions(context); - } - OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context); - Configuration conf = context.getConfiguration(); - Table table = new Table(jobInfo.getTableInfo().getTable()); - Path tblPath = new Path(table.getTTable().getSd().getLocation()); - FileSystem fs = tblPath.getFileSystem(conf); - - if( table.getPartitionKeys().size() == 0 ) { - //Move data from temp directory the actual table directory - //No metastore operation required. - Path src = new Path(jobInfo.getLocation()); - moveTaskOutputs(fs, src, src, tblPath, false); - fs.delete(src, true); - return; - } - - HiveMetaStoreClient client = null; - HCatTableInfo tableInfo = jobInfo.getTableInfo(); - List partitionsAdded = new ArrayList(); - try { - HiveConf hiveConf = HCatUtil.getHiveConf(conf); - client = HCatUtil.getHiveClient(hiveConf); - StorerInfo storer = InternalUtil.extractStorerInfo(table.getTTable().getSd(),table.getParameters()); - - FileStatus tblStat = fs.getFileStatus(tblPath); - String grpName = tblStat.getGroup(); - FsPermission perms = tblStat.getPermission(); - - List partitionsToAdd = new ArrayList(); - if (!dynamicPartitioningUsed){ - partitionsToAdd.add( - constructPartition( - context,jobInfo, - tblPath.toString(), jobInfo.getPartitionValues() - ,jobInfo.getOutputSchema(), getStorerParameterMap(storer) - ,table, fs - ,grpName,perms)); - }else{ - for (Entry> entry : partitionsDiscoveredByPath.entrySet()){ - partitionsToAdd.add( - constructPartition( - context,jobInfo, - getPartitionRootLocation(entry.getKey(),entry.getValue().size()), entry.getValue() - ,jobInfo.getOutputSchema(), getStorerParameterMap(storer) - ,table, fs - ,grpName,perms)); - } - } - - ArrayList> ptnInfos = new ArrayList>(); - for(Partition ptn : partitionsToAdd){ - ptnInfos.add(InternalUtil.createPtnKeyValueMap(new Table(tableInfo.getTable()), ptn)); - } - - //Publish the new partition(s) - if (dynamicPartitioningUsed && harProcessor.isEnabled() && (!partitionsToAdd.isEmpty())){ - - Path src = new Path(ptnRootLocation); - // check here for each dir we're copying out, to see if it - // already exists, error out if so - moveTaskOutputs(fs, src, src, tblPath, true); - moveTaskOutputs(fs, src, src, tblPath, false); - fs.delete(src, true); - try { - updateTableSchema(client, table, jobInfo.getOutputSchema()); - LOG.info("HAR is being used. The table {} has new partitions {}.", table.getTableName(), ptnInfos); - client.add_partitions(partitionsToAdd); - partitionsAdded = partitionsToAdd; - } catch (Exception e){ - // There was an error adding partitions : rollback fs copy and rethrow - for (Partition p : partitionsToAdd){ - Path ptnPath = new Path(harProcessor.getParentFSPath(new Path(p.getSd().getLocation()))); - if (fs.exists(ptnPath)){ - fs.delete(ptnPath,true); - } - } - throw e; - } - - }else{ - // no harProcessor, regular operation - updateTableSchema(client, table, jobInfo.getOutputSchema()); - LOG.info("HAR not is not being used. The table {} has new partitions {}.", table.getTableName(), ptnInfos); - if (dynamicPartitioningUsed && (partitionsToAdd.size()>0)){ - Path src = new Path(ptnRootLocation); - moveTaskOutputs(fs, src, src, tblPath, true); - moveTaskOutputs(fs, src, src, tblPath, false); - fs.delete(src, true); - } - client.add_partitions(partitionsToAdd); - partitionsAdded = partitionsToAdd; - } - } catch (Exception e) { - if (partitionsAdded.size() > 0) { - try { - // baseCommitter.cleanupJob failed, try to clean up the - // metastore - for (Partition p : partitionsAdded) { - client.dropPartition(tableInfo.getDatabaseName(), - tableInfo.getTableName(), p.getValues()); - } - } catch (Exception te) { - // Keep cause as the original exception - throw new HCatException( - ErrorType.ERROR_PUBLISHING_PARTITION, e); - } - } - if (e instanceof HCatException) { - throw (HCatException) e; - } else { - throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e); - } - } finally { - HCatUtil.closeHiveClientQuietly(client); - } - } - - private void cancelDelegationTokens(JobContext context) throws IOException{ - LOG.info("Cancelling deletgation token for the job."); - HiveMetaStoreClient client = null; - try { - HiveConf hiveConf = HCatUtil - .getHiveConf(context.getConfiguration()); - client = HCatUtil.getHiveClient(hiveConf); - // cancel the deleg. tokens that were acquired for this job now that - // we are done - we should cancel if the tokens were acquired by - // HCatOutputFormat and not if they were supplied by Oozie. - // In the latter case the HCAT_KEY_TOKEN_SIGNATURE property in - // the conf will not be set - String tokenStrForm = client.getTokenStrForm(); - if (tokenStrForm != null - && context.getConfiguration().get( - HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { - client.cancelDelegationToken(tokenStrForm); - } - } catch (MetaException e) { - LOG.warn("MetaException while cancelling delegation token.", e); - } catch (TException e) { - LOG.warn("TException while cancelling delegation token.", e); - } finally { - HCatUtil.closeHiveClientQuietly(client); - } - } - - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputFormatContainer.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputFormatContainer.java deleted file mode 100644 index 4e86a64..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileOutputFormatContainer.java +++ /dev/null @@ -1,252 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.PathFilter; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.serde2.SerDe; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapred.FileOutputFormat; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; -import org.apache.hadoop.util.ReflectionUtils; -import org.apache.hcatalog.common.ErrorType; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.thrift.TException; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -/** - * File-based storage (ie RCFile, Text, etc) implementation of OutputFormatContainer. - * This implementation supports the following HCatalog features: partitioning, dynamic partitioning, Hadoop Archiving, etc. - */ -class FileOutputFormatContainer extends OutputFormatContainer { - - private static final PathFilter hiddenFileFilter = new PathFilter() { - public boolean accept(Path p) { - String name = p.getName(); - return !name.startsWith("_") && !name.startsWith("."); - } - }; - - /** - * @param of base OutputFormat to contain - */ - public FileOutputFormatContainer(org.apache.hadoop.mapred.OutputFormat, ? super Writable> of) { - super(of); - } - - @Override - public RecordWriter, HCatRecord> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { - //this needs to be manually set, under normal circumstances MR Task does this - setWorkOutputPath(context); - - //Configure the output key and value classes. - // This is required for writing null as key for file based tables. - context.getConfiguration().set("mapred.output.key.class", - NullWritable.class.getName()); - String jobInfoString = context.getConfiguration().get( - HCatConstants.HCAT_KEY_OUTPUT_INFO); - OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil - .deserialize(jobInfoString); - StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo(); - HCatStorageHandler storageHandler = HCatUtil.getStorageHandler( - context.getConfiguration(), storeInfo); - Class serde = storageHandler.getSerDeClass(); - SerDe sd = (SerDe) ReflectionUtils.newInstance(serde, - context.getConfiguration()); - context.getConfiguration().set("mapred.output.value.class", - sd.getSerializedClass().getName()); - - RecordWriter, HCatRecord> rw; - if (HCatBaseOutputFormat.getJobInfo(context).isDynamicPartitioningUsed()){ - // When Dynamic partitioning is used, the RecordWriter instance initialized here isn't used. Can use null. - // (That's because records can't be written until the values of the dynamic partitions are deduced. - // By that time, a new local instance of RecordWriter, with the correct output-path, will be constructed.) - rw = new FileRecordWriterContainer((org.apache.hadoop.mapred.RecordWriter)null,context); - } else { - Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir")); - Path childPath = new Path(parentDir,FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()), "part")); - - rw = new FileRecordWriterContainer( - getBaseOutputFormat().getRecordWriter( - parentDir.getFileSystem(context.getConfiguration()), - new JobConf(context.getConfiguration()), - childPath.toString(), - InternalUtil.createReporter(context)), - context); - } - return rw; - } - - @Override - public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { - OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context); - HiveMetaStoreClient client = null; - try { - HiveConf hiveConf = HCatUtil.getHiveConf(context.getConfiguration()); - client = HCatUtil.getHiveClient(hiveConf); - handleDuplicatePublish(context, - jobInfo, - client, - new Table(jobInfo.getTableInfo().getTable())); - } catch (MetaException e) { - throw new IOException(e); - } catch (TException e) { - throw new IOException(e); - } finally { - HCatUtil.closeHiveClientQuietly(client); - } - - if (!jobInfo.isDynamicPartitioningUsed()) { - JobConf jobConf = new JobConf(context.getConfiguration()); - getBaseOutputFormat().checkOutputSpecs(null, jobConf); - //checkoutputspecs might've set some properties we need to have context reflect that - HCatUtil.copyConf(jobConf, context.getConfiguration()); - } - } - - @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { - //this needs to be manually set, under normal circumstances MR Task does this - setWorkOutputPath(context); - return new FileOutputCommitterContainer(context, - HCatBaseOutputFormat.getJobInfo(context).isDynamicPartitioningUsed() ? - null : - new JobConf(context.getConfiguration()).getOutputCommitter()); - } - - /** - * Handles duplicate publish of partition. Fails if partition already exists. - * For non partitioned tables, fails if files are present in table directory. - * For dynamic partitioned publish, does nothing - check would need to be done at recordwriter time - * @param context the job - * @param outputInfo the output info - * @param client the metastore client - * @param table the table being written to - * @throws IOException - * @throws org.apache.hadoop.hive.metastore.api.MetaException - * @throws org.apache.thrift.TException - */ - private static void handleDuplicatePublish(JobContext context, OutputJobInfo outputInfo, - HiveMetaStoreClient client, Table table) throws IOException, MetaException, TException, NoSuchObjectException { - - /* - * For fully specified ptn, follow strict checks for existence of partitions in metadata - * For unpartitioned tables, follow filechecks - * For partially specified tables: - * This would then need filechecks at the start of a ptn write, - * Doing metadata checks can get potentially very expensive (fat conf) if - * there are a large number of partitions that match the partial specifications - */ - - if (table.getPartitionKeys().size() > 0) { - if (!outputInfo.isDynamicPartitioningUsed()) { - List partitionValues = getPartitionValueList( - table, outputInfo.getPartitionValues()); - // fully-specified partition - List currentParts = client.listPartitionNames(outputInfo.getDatabaseName(), - outputInfo.getTableName(), partitionValues, (short) 1); - - if (currentParts.size() > 0) { - throw new HCatException(ErrorType.ERROR_DUPLICATE_PARTITION); - } - } - } else { - List partitionValues = getPartitionValueList( - table, outputInfo.getPartitionValues()); - // non-partitioned table - - Path tablePath = new Path(table.getTTable().getSd().getLocation()); - FileSystem fs = tablePath.getFileSystem(context.getConfiguration()); - - if (fs.exists(tablePath)) { - FileStatus[] status = fs.globStatus(new Path(tablePath, "*"), hiddenFileFilter); - - if (status.length > 0) { - throw new HCatException(ErrorType.ERROR_NON_EMPTY_TABLE, - table.getDbName() + "." + table.getTableName()); - } - } - } - } - - /** - * Convert the partition value map to a value list in the partition key order. - * @param table the table being written to - * @param valueMap the partition value map - * @return the partition value list - * @throws java.io.IOException - */ - static List getPartitionValueList(Table table, Map valueMap) throws IOException { - - if (valueMap.size() != table.getPartitionKeys().size()) { - throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, - "Table " - + table.getTableName() + " has " + - table.getPartitionKeys().size() + " partition keys, got " + - valueMap.size()); - } - - List values = new ArrayList(); - - for (FieldSchema schema : table.getPartitionKeys()) { - String value = valueMap.get(schema.getName().toLowerCase()); - - if (value == null) { - throw new HCatException(ErrorType.ERROR_MISSING_PARTITION_KEY, - "Key " + schema.getName() + " of table " + table.getTableName()); - } - - values.add(value); - } - - return values; - } - - static void setWorkOutputPath(TaskAttemptContext context) throws IOException { - String outputPath = context.getConfiguration().get("mapred.output.dir"); - //we need to do this to get the task path and set it for mapred implementation - //since it can't be done automatically because of mapreduce->mapred abstraction - if (outputPath != null) - context.getConfiguration().set("mapred.work.output.dir", - new FileOutputCommitter(new Path(outputPath), context).getWorkPath().toString()); - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java deleted file mode 100644 index baab03f..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FileRecordWriterContainer.java +++ /dev/null @@ -1,266 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.serde2.SerDe; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.util.ReflectionUtils; -import org.apache.hcatalog.common.ErrorType; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.HCatRecord; - -/** - * Part of the FileOutput*Container classes - * See {@link FileOutputFormatContainer} for more information - */ -class FileRecordWriterContainer extends RecordWriterContainer { - - private final HCatStorageHandler storageHandler; - private final SerDe serDe; - private final ObjectInspector objectInspector; - - private boolean dynamicPartitioningUsed = false; - - private final Map, ? super Writable>> baseDynamicWriters; - private final Map baseDynamicSerDe; - private final Map baseDynamicCommitters; - private final Map dynamicContexts; - private final Map dynamicObjectInspectors; - private Map dynamicOutputJobInfo; - - - private final List partColsToDel; - private final List dynamicPartCols; - private int maxDynamicPartitions; - - private OutputJobInfo jobInfo; - private TaskAttemptContext context; - - /** - * @param baseWriter RecordWriter to contain - * @param context current TaskAttemptContext - * @throws IOException - * @throws InterruptedException - */ - public FileRecordWriterContainer(org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseWriter, - TaskAttemptContext context) throws IOException, InterruptedException { - super(context, baseWriter); - this.context = context; - jobInfo = HCatOutputFormat.getJobInfo(context); - - storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); - serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); - objectInspector = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); - try { - InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); - } catch (SerDeException e) { - throw new IOException("Failed to inialize SerDe", e); - } - - // If partition columns occur in data, we want to remove them. - partColsToDel = jobInfo.getPosOfPartCols(); - dynamicPartitioningUsed = jobInfo.isDynamicPartitioningUsed(); - dynamicPartCols = jobInfo.getPosOfDynPartCols(); - maxDynamicPartitions = jobInfo.getMaxDynamicPartitions(); - - if ((partColsToDel == null) || (dynamicPartitioningUsed && (dynamicPartCols == null))) { - throw new HCatException("It seems that setSchema() is not called on " + - "HCatOutputFormat. Please make sure that method is called."); - } - - - if (!dynamicPartitioningUsed) { - this.baseDynamicSerDe = null; - this.baseDynamicWriters = null; - this.baseDynamicCommitters = null; - this.dynamicContexts = null; - this.dynamicObjectInspectors = null; - this.dynamicOutputJobInfo = null; - } else { - this.baseDynamicSerDe = new HashMap(); - this.baseDynamicWriters = new HashMap, ? super Writable>>(); - this.baseDynamicCommitters = new HashMap(); - this.dynamicContexts = new HashMap(); - this.dynamicObjectInspectors = new HashMap(); - this.dynamicOutputJobInfo = new HashMap(); - } - } - - /** - * @return the storagehandler - */ - public HCatStorageHandler getStorageHandler() { - return storageHandler; - } - - @Override - public void close(TaskAttemptContext context) throws IOException, - InterruptedException { - Reporter reporter = InternalUtil.createReporter(context); - if (dynamicPartitioningUsed) { - for (org.apache.hadoop.mapred.RecordWriter, ? super Writable> bwriter : baseDynamicWriters.values()) { - //We are in RecordWriter.close() make sense that the context would be TaskInputOutput - bwriter.close(reporter); - } - for (Map.Entry entry : baseDynamicCommitters.entrySet()) { - org.apache.hadoop.mapred.TaskAttemptContext currContext = dynamicContexts.get(entry.getKey()); - OutputCommitter baseOutputCommitter = entry.getValue(); - if (baseOutputCommitter.needsTaskCommit(currContext)) { - baseOutputCommitter.commitTask(currContext); - } - } - } else { - getBaseRecordWriter().close(reporter); - } - } - - @Override - public void write(WritableComparable key, HCatRecord value) throws IOException, - InterruptedException { - - org.apache.hadoop.mapred.RecordWriter localWriter; - ObjectInspector localObjectInspector; - SerDe localSerDe; - OutputJobInfo localJobInfo = null; - - if (dynamicPartitioningUsed) { - // calculate which writer to use from the remaining values - this needs to be done before we delete cols - List dynamicPartValues = new ArrayList(); - for (Integer colToAppend : dynamicPartCols) { - dynamicPartValues.add(value.get(colToAppend).toString()); - } - - String dynKey = dynamicPartValues.toString(); - if (!baseDynamicWriters.containsKey(dynKey)) { - if ((maxDynamicPartitions != -1) && (baseDynamicWriters.size() > maxDynamicPartitions)) { - throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, - "Number of dynamic partitions being created " - + "exceeds configured max allowable partitions[" - + maxDynamicPartitions - + "], increase parameter [" - + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname - + "] if needed."); - } - - org.apache.hadoop.mapred.TaskAttemptContext currTaskContext = HCatMapRedUtil.createTaskAttemptContext(context); - configureDynamicStorageHandler(currTaskContext, dynamicPartValues); - localJobInfo = HCatBaseOutputFormat.getJobInfo(currTaskContext); - - //setup serDe - SerDe currSerDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), currTaskContext.getJobConf()); - try { - InternalUtil.initializeOutputSerDe(currSerDe, currTaskContext.getConfiguration(), localJobInfo); - } catch (SerDeException e) { - throw new IOException("Failed to initialize SerDe", e); - } - - //create base OutputFormat - org.apache.hadoop.mapred.OutputFormat baseOF = - ReflectionUtils.newInstance(storageHandler.getOutputFormatClass(), currTaskContext.getJobConf()); - - //We are skipping calling checkOutputSpecs() for each partition - //As it can throw a FileAlreadyExistsException when more than one mapper is writing to a partition - //See HCATALOG-490, also to avoid contacting the namenode for each new FileOutputFormat instance - //In general this should be ok for most FileOutputFormat implementations - //but may become an issue for cases when the method is used to perform other setup tasks - - //get Output Committer - org.apache.hadoop.mapred.OutputCommitter baseOutputCommitter = currTaskContext.getJobConf().getOutputCommitter(); - //create currJobContext the latest so it gets all the config changes - org.apache.hadoop.mapred.JobContext currJobContext = HCatMapRedUtil.createJobContext(currTaskContext); - //setupJob() - baseOutputCommitter.setupJob(currJobContext); - //recreate to refresh jobConf of currTask context - currTaskContext = - HCatMapRedUtil.createTaskAttemptContext(currJobContext.getJobConf(), - currTaskContext.getTaskAttemptID(), - currTaskContext.getProgressible()); - //set temp location - currTaskContext.getConfiguration().set("mapred.work.output.dir", - new FileOutputCommitter(new Path(localJobInfo.getLocation()), currTaskContext).getWorkPath().toString()); - //setupTask() - baseOutputCommitter.setupTask(currTaskContext); - - Path parentDir = new Path(currTaskContext.getConfiguration().get("mapred.work.output.dir")); - Path childPath = new Path(parentDir,FileOutputFormat.getUniqueFile(currTaskContext, "part", "")); - - org.apache.hadoop.mapred.RecordWriter baseRecordWriter = - baseOF.getRecordWriter( - parentDir.getFileSystem(currTaskContext.getConfiguration()), - currTaskContext.getJobConf(), - childPath.toString(), - InternalUtil.createReporter(currTaskContext)); - - baseDynamicWriters.put(dynKey, baseRecordWriter); - baseDynamicSerDe.put(dynKey, currSerDe); - baseDynamicCommitters.put(dynKey, baseOutputCommitter); - dynamicContexts.put(dynKey, currTaskContext); - dynamicObjectInspectors.put(dynKey, InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema())); - dynamicOutputJobInfo.put(dynKey, HCatOutputFormat.getJobInfo(dynamicContexts.get(dynKey))); - } - - localJobInfo = dynamicOutputJobInfo.get(dynKey); - localWriter = baseDynamicWriters.get(dynKey); - localSerDe = baseDynamicSerDe.get(dynKey); - localObjectInspector = dynamicObjectInspectors.get(dynKey); - } else { - localJobInfo = jobInfo; - localWriter = getBaseRecordWriter(); - localSerDe = serDe; - localObjectInspector = objectInspector; - } - - for (Integer colToDel : partColsToDel) { - value.remove(colToDel); - } - - - //The key given by user is ignored - try { - localWriter.write(NullWritable.get(), localSerDe.serialize(value.getAll(), localObjectInspector)); - } catch (SerDeException e) { - throw new IOException("Failed to serialize object", e); - } - } - - protected void configureDynamicStorageHandler(JobContext context, List dynamicPartVals) throws IOException { - HCatOutputFormat.configureOutputStorageHandler(context, dynamicPartVals); - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FosterStorageHandler.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FosterStorageHandler.java deleted file mode 100644 index a2f6419..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/FosterStorageHandler.java +++ /dev/null @@ -1,186 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.metastore.HiveMetaHook; -import org.apache.hadoop.hive.ql.io.RCFile; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.plan.TableDesc; -import org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider; -import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; -import org.apache.hadoop.hive.serde2.SerDe; -import org.apache.hadoop.mapred.InputFormat; -import org.apache.hadoop.mapred.OutputFormat; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -/** - * This class is used to encapsulate the InputFormat, OutputFormat and SerDe - * artifacts of tables which don't define a SerDe. This StorageHandler assumes - * the supplied storage artifacts are for a file-based storage system. - */ -public class FosterStorageHandler extends HCatStorageHandler { - - public Configuration conf; - /** The directory under which data is initially written for a partitioned table */ - protected static final String DYNTEMP_DIR_NAME = "_DYN"; - - /** The directory under which data is initially written for a non partitioned table */ - protected static final String TEMP_DIR_NAME = "_TEMP"; - - private Class ifClass; - private Class ofClass; - private Class serDeClass; - - public FosterStorageHandler(String ifName, String ofName, String serdeName) throws ClassNotFoundException { - this((Class) Class.forName(ifName), - (Class) Class.forName(ofName), - (Class) Class.forName(serdeName)); - } - - public FosterStorageHandler(Class ifClass, - Class ofClass, - Class serDeClass) { - this.ifClass = ifClass; - this.ofClass = ofClass; - this.serDeClass = serDeClass; - } - - @Override - public Class getInputFormatClass() { - return ifClass; //To change body of overridden methods use File | Settings | File Templates. - } - - @Override - public Class getOutputFormatClass() { - return ofClass; //To change body of overridden methods use File | Settings | File Templates. - } - - @Override - public Class getSerDeClass() { - return serDeClass; //To change body of implemented methods use File | Settings | File Templates. - } - - @Override - public HiveMetaHook getMetaHook() { - return null; - } - - @Override - public void configureInputJobProperties(TableDesc tableDesc, - Map jobProperties) { - - } - - @Override - public void configureOutputJobProperties(TableDesc tableDesc, - Map jobProperties) { - try { - OutputJobInfo jobInfo = (OutputJobInfo) - HCatUtil.deserialize(tableDesc.getJobProperties().get( - HCatConstants.HCAT_KEY_OUTPUT_INFO)); - String parentPath = jobInfo.getTableInfo().getTableLocation(); - String dynHash = tableDesc.getJobProperties().get( - HCatConstants.HCAT_DYNAMIC_PTN_JOBID); - - // For dynamic partitioned writes without all keyvalues specified, - // we create a temp dir for the associated write job - if (dynHash != null) { - parentPath = new Path(parentPath, - DYNTEMP_DIR_NAME + dynHash).toString(); - } - - String outputLocation; - - if ((dynHash == null) - && Boolean.valueOf((String)tableDesc.getProperties().get("EXTERNAL")) - && jobInfo.getLocation() != null && jobInfo.getLocation().length() > 0) { - // honor custom location for external table apart from what metadata specifies - // only if we're not using dynamic partitioning - see HIVE-5011 - outputLocation = jobInfo.getLocation(); - } else if (dynHash == null && jobInfo.getPartitionValues().size() == 0) { - // For non-partitioned tables, we send them to the temp dir - outputLocation = TEMP_DIR_NAME; - } else { - List cols = new ArrayList(); - List values = new ArrayList(); - - //Get the output location in the order partition keys are defined for the table. - for (String name : - jobInfo.getTableInfo(). - getPartitionColumns().getFieldNames()) { - String value = jobInfo.getPartitionValues().get(name); - cols.add(name); - values.add(value); - } - outputLocation = FileUtils.makePartName(cols, values); - } - - jobInfo.setLocation(new Path(parentPath, outputLocation).toString()); - - //only set output dir if partition is fully materialized - if (jobInfo.getPartitionValues().size() - == jobInfo.getTableInfo().getPartitionColumns().size()) { - jobProperties.put("mapred.output.dir", jobInfo.getLocation()); - } - - //TODO find a better home for this, RCFile specifc - jobProperties.put(RCFile.COLUMN_NUMBER_CONF_STR, - Integer.toOctalString( - jobInfo.getOutputSchema().getFields().size())); - jobProperties.put(HCatConstants.HCAT_KEY_OUTPUT_INFO, - HCatUtil.serialize(jobInfo)); - } catch (IOException e) { - throw new IllegalStateException("Failed to set output path", e); - } - - } - - @Override - OutputFormatContainer getOutputFormatContainer( - org.apache.hadoop.mapred.OutputFormat outputFormat) { - return new FileOutputFormatContainer(outputFormat); - } - - @Override - public Configuration getConf() { - return conf; - } - - @Override - public void setConf(Configuration conf) { - this.conf = conf; - } - - @Override - public HiveAuthorizationProvider getAuthorizationProvider() - throws HiveException { - return new DefaultHiveAuthorizationProvider(); - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java deleted file mode 100644 index bc0e04c..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatBaseInputFormat.java +++ /dev/null @@ -1,333 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.Map; -import java.util.HashMap; -import java.util.List; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapreduce.InputFormat; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.util.ReflectionUtils; -import org.apache.hadoop.util.StringUtils; - -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; - -public abstract class HCatBaseInputFormat - extends InputFormat { - - /** - * get the schema for the HCatRecord data returned by HCatInputFormat. - * - * @param context the jobContext - * @throws IllegalArgumentException - */ - private Class inputFileFormatClass; - - // TODO needs to go in InitializeInput? as part of InputJobInfo - private static HCatSchema getOutputSchema(Configuration conf) - throws IOException { - String os = conf.get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA); - if (os == null) { - return getTableSchema(conf); - } else { - return (HCatSchema) HCatUtil.deserialize(os); - } - } - - /** - * Set the schema for the HCatRecord data returned by HCatInputFormat. - * @param job the job object - * @param hcatSchema the schema to use as the consolidated schema - */ - public static void setOutputSchema(Job job, HCatSchema hcatSchema) - throws IOException { - job.getConfiguration().set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, - HCatUtil.serialize(hcatSchema)); - } - - protected static org.apache.hadoop.mapred.InputFormat - getMapRedInputFormat(JobConf job, Class inputFormatClass) throws IOException { - return ( - org.apache.hadoop.mapred.InputFormat) - ReflectionUtils.newInstance(inputFormatClass, job); - } - - /** - * Logically split the set of input files for the job. Returns the - * underlying InputFormat's splits - * @param jobContext the job context object - * @return the splits, an HCatInputSplit wrapper over the storage - * handler InputSplits - * @throws IOException or InterruptedException - */ - @Override - public List getSplits(JobContext jobContext) - throws IOException, InterruptedException { - Configuration conf = jobContext.getConfiguration(); - - //Get the job info from the configuration, - //throws exception if not initialized - InputJobInfo inputJobInfo; - try { - inputJobInfo = getJobInfo(conf); - } catch (Exception e) { - throw new IOException(e); - } - - List splits = new ArrayList(); - List partitionInfoList = inputJobInfo.getPartitions(); - if (partitionInfoList == null) { - //No partitions match the specified partition filter - return splits; - } - - HCatStorageHandler storageHandler; - JobConf jobConf; - //For each matching partition, call getSplits on the underlying InputFormat - for (PartInfo partitionInfo : partitionInfoList) { - jobConf = HCatUtil.getJobConfFromContext(jobContext); - setInputPath(jobConf, partitionInfo.getLocation()); - Map jobProperties = partitionInfo.getJobProperties(); - - HCatSchema allCols = new HCatSchema(new LinkedList()); - for (HCatFieldSchema field : - inputJobInfo.getTableInfo().getDataColumns().getFields()) - allCols.append(field); - for (HCatFieldSchema field : - inputJobInfo.getTableInfo().getPartitionColumns().getFields()) - allCols.append(field); - - HCatUtil.copyJobPropertiesToJobConf(jobProperties, jobConf); - - storageHandler = HCatUtil.getStorageHandler( - jobConf, partitionInfo); - - //Get the input format - Class inputFormatClass = storageHandler.getInputFormatClass(); - org.apache.hadoop.mapred.InputFormat inputFormat = - getMapRedInputFormat(jobConf, inputFormatClass); - - //Call getSplit on the InputFormat, create an HCatSplit for each - //underlying split. When the desired number of input splits is missing, - //use a default number (denoted by zero). - //TODO(malewicz): Currently each partition is split independently into - //a desired number. However, we want the union of all partitions to be - //split into a desired number while maintaining balanced sizes of input - //splits. - int desiredNumSplits = - conf.getInt(HCatConstants.HCAT_DESIRED_PARTITION_NUM_SPLITS, 0); - org.apache.hadoop.mapred.InputSplit[] baseSplits = - inputFormat.getSplits(jobConf, desiredNumSplits); - - for (org.apache.hadoop.mapred.InputSplit split : baseSplits) { - splits.add(new HCatSplit( - partitionInfo, - split, allCols)); - } - } - - return splits; - } - - /** - * Create the RecordReader for the given InputSplit. Returns the underlying - * RecordReader if the required operations are supported and schema matches - * with HCatTable schema. Returns an HCatRecordReader if operations need to - * be implemented in HCat. - * @param split the split - * @param taskContext the task attempt context - * @return the record reader instance, either an HCatRecordReader(later) or - * the underlying storage handler's RecordReader - * @throws IOException or InterruptedException - */ - @Override - public RecordReader - createRecordReader(InputSplit split, - TaskAttemptContext taskContext) throws IOException, InterruptedException { - - HCatSplit hcatSplit = InternalUtil.castToHCatSplit(split); - PartInfo partitionInfo = hcatSplit.getPartitionInfo(); - JobContext jobContext = taskContext; - Configuration conf = jobContext.getConfiguration(); - - HCatStorageHandler storageHandler = HCatUtil.getStorageHandler( - conf, partitionInfo); - - JobConf jobConf = HCatUtil.getJobConfFromContext(jobContext); - Map jobProperties = partitionInfo.getJobProperties(); - HCatUtil.copyJobPropertiesToJobConf(jobProperties, jobConf); - - Map valuesNotInDataCols = getColValsNotInDataColumns( - getOutputSchema(conf), partitionInfo - ); - - return new HCatRecordReader(storageHandler, valuesNotInDataCols); - } - - - /** - * gets values for fields requested by output schema which will not be in the data - */ - private static Map getColValsNotInDataColumns(HCatSchema outputSchema, - PartInfo partInfo) { - HCatSchema dataSchema = partInfo.getPartitionSchema(); - Map vals = new HashMap(); - for (String fieldName : outputSchema.getFieldNames()) { - if (dataSchema.getPosition(fieldName) == null) { - // this entry of output is not present in the output schema - // so, we first check the table schema to see if it is a part col - - if (partInfo.getPartitionValues().containsKey(fieldName)) { - vals.put(fieldName, partInfo.getPartitionValues().get(fieldName)); - } else { - vals.put(fieldName, null); - } - } - } - return vals; - } - - /** - * @see org.apache.hcatalog.mapreduce.HCatBaseInputFormat#getTableSchema(org.apache.hadoop.conf.Configuration) - * @deprecated Use {@link #getTableSchema(org.apache.hadoop.conf.Configuration)} - */ - public static HCatSchema getTableSchema(JobContext context) - throws IOException { - return getTableSchema(context.getConfiguration()); - } - - - /** - * Gets the HCatTable schema for the table specified in the HCatInputFormat.setInput call - * on the specified job context. This information is available only after HCatInputFormat.setInput - * has been called for a JobContext. - * @param conf the Configuration object - * @return the table schema - * @throws IOException if HCatInputFormat.setInput has not been called - * for the current context - */ - public static HCatSchema getTableSchema(Configuration conf) - throws IOException { - InputJobInfo inputJobInfo = getJobInfo(conf); - HCatSchema allCols = new HCatSchema(new LinkedList()); - for (HCatFieldSchema field : - inputJobInfo.getTableInfo().getDataColumns().getFields()) - allCols.append(field); - for (HCatFieldSchema field : - inputJobInfo.getTableInfo().getPartitionColumns().getFields()) - allCols.append(field); - return allCols; - } - - /** - * Gets the InputJobInfo object by reading the Configuration and deserializing - * the string. If InputJobInfo is not present in the configuration, throws an - * exception since that means HCatInputFormat.setInput has not been called. - * @param conf the Configuration object - * @return the InputJobInfo object - * @throws IOException the exception - */ - private static InputJobInfo getJobInfo(Configuration conf) - throws IOException { - String jobString = conf.get( - HCatConstants.HCAT_KEY_JOB_INFO); - if (jobString == null) { - throw new IOException("job information not found in JobContext." - + " HCatInputFormat.setInput() not called?"); - } - - return (InputJobInfo) HCatUtil.deserialize(jobString); - } - - private void setInputPath(JobConf jobConf, String location) - throws IOException { - - // ideally we should just call FileInputFormat.setInputPaths() here - but - // that won't work since FileInputFormat.setInputPaths() needs - // a Job object instead of a JobContext which we are handed here - - int length = location.length(); - int curlyOpen = 0; - int pathStart = 0; - boolean globPattern = false; - List pathStrings = new ArrayList(); - - for (int i = 0; i < length; i++) { - char ch = location.charAt(i); - switch (ch) { - case '{': { - curlyOpen++; - if (!globPattern) { - globPattern = true; - } - break; - } - case '}': { - curlyOpen--; - if (curlyOpen == 0 && globPattern) { - globPattern = false; - } - break; - } - case ',': { - if (!globPattern) { - pathStrings.add(location.substring(pathStart, i)); - pathStart = i + 1; - } - break; - } - } - } - pathStrings.add(location.substring(pathStart, length)); - - Path[] paths = StringUtils.stringToPath(pathStrings.toArray(new String[0])); - String separator = ""; - StringBuilder str = new StringBuilder(); - - for (Path path : paths) { - FileSystem fs = path.getFileSystem(jobConf); - final String qualifiedPath = fs.makeQualified(path).toString(); - str.append(separator) - .append(StringUtils.escapeString(qualifiedPath)); - separator = StringUtils.COMMA_STR; - } - - jobConf.set("mapred.input.dir", str.toString()); - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatBaseOutputFormat.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatBaseOutputFormat.java deleted file mode 100644 index e90d6f6..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatBaseOutputFormat.java +++ /dev/null @@ -1,243 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.hadoop.util.ReflectionUtils; -import org.apache.hcatalog.common.ErrorType; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; - -public abstract class HCatBaseOutputFormat extends OutputFormat, HCatRecord> { - -// static final private Log LOG = LogFactory.getLog(HCatBaseOutputFormat.class); - - /** - * @see org.apache.hcatalog.mapreduce.HCatBaseOutputFormat#getTableSchema(org.apache.hadoop.conf.Configuration) - * @deprecated Use {@link #getTableSchema(org.apache.hadoop.conf.Configuration)} - */ - public static HCatSchema getTableSchema(JobContext context) throws IOException { - return getTableSchema(context.getConfiguration()); - } - - /** - * Gets the table schema for the table specified in the HCatOutputFormat.setOutput call - * on the specified job context. - * @param conf the Configuration object - * @return the table schema - * @throws IOException if HCatOutputFormat.setOutput has not been called for the passed context - */ - public static HCatSchema getTableSchema(Configuration conf) throws IOException { - OutputJobInfo jobInfo = getJobInfo(conf); - return jobInfo.getTableInfo().getDataColumns(); - } - - /** - * Check for validity of the output-specification for the job. - * @param context information about the job - * @throws IOException when output should not be attempted - */ - @Override - public void checkOutputSpecs(JobContext context - ) throws IOException, InterruptedException { - getOutputFormat(context).checkOutputSpecs(context); - } - - /** - * Gets the output format instance. - * @param context the job context - * @return the output format instance - * @throws IOException - */ - protected OutputFormat, HCatRecord> getOutputFormat(JobContext context) throws IOException { - OutputJobInfo jobInfo = getJobInfo(context); - HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); - //why do we need this? - configureOutputStorageHandler(context); - return storageHandler.getOutputFormatContainer(ReflectionUtils.newInstance(storageHandler.getOutputFormatClass(), context.getConfiguration())); - } - - /** - * @see org.apache.hcatalog.mapreduce.HCatBaseOutputFormat#getJobInfo(org.apache.hadoop.conf.Configuration) - * @deprecated use {@link #getJobInfo(org.apache.hadoop.conf.Configuration)} - */ - public static OutputJobInfo getJobInfo(JobContext jobContext) throws IOException { - return getJobInfo(jobContext.getConfiguration()); - } - - /** - * Gets the HCatOuputJobInfo object by reading the Configuration and deserializing - * the string. If InputJobInfo is not present in the configuration, throws an - * exception since that means HCatOutputFormat.setOutput has not been called. - * @param conf the job Configuration object - * @return the OutputJobInfo object - * @throws IOException the IO exception - */ - public static OutputJobInfo getJobInfo(Configuration conf) throws IOException { - String jobString = conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO); - if (jobString == null) { - throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED); - } - - return (OutputJobInfo) HCatUtil.deserialize(jobString); - } - - /** - * Configure the output storage handler - * @param jobContext the job context - * @throws IOException - */ - @SuppressWarnings("unchecked") - static void configureOutputStorageHandler( - JobContext jobContext) throws IOException { - configureOutputStorageHandler(jobContext, (List) null); - } - - /** - * Configure the output storage handler with allowing specification of missing dynamic partvals - * @param jobContext the job context - * @param dynamicPartVals - * @throws IOException - */ - @SuppressWarnings("unchecked") - static void configureOutputStorageHandler( - JobContext jobContext, List dynamicPartVals) throws IOException { - Configuration conf = jobContext.getConfiguration(); - try { - OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, jobInfo.getTableInfo().getStorerInfo()); - - Map partitionValues = jobInfo.getPartitionValues(); - String location = jobInfo.getLocation(); - - if (dynamicPartVals != null) { - // dynamic part vals specified - List dynamicPartKeys = jobInfo.getDynamicPartitioningKeys(); - if (dynamicPartVals.size() != dynamicPartKeys.size()) { - throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, - "Unable to configure dynamic partitioning for storage handler, mismatch between" - + " number of partition values obtained[" + dynamicPartVals.size() - + "] and number of partition values required[" + dynamicPartKeys.size() + "]"); - } - for (int i = 0; i < dynamicPartKeys.size(); i++) { - partitionValues.put(dynamicPartKeys.get(i), dynamicPartVals.get(i)); - } - -// // re-home location, now that we know the rest of the partvals -// Table table = jobInfo.getTableInfo().getTable(); -// -// List partitionCols = new ArrayList(); -// for(FieldSchema schema : table.getPartitionKeys()) { -// partitionCols.add(schema.getName()); -// } - jobInfo.setPartitionValues(partitionValues); - } - - HCatUtil.configureOutputStorageHandler(storageHandler, conf, jobInfo); - } catch (Exception e) { - if (e instanceof HCatException) { - throw (HCatException) e; - } else { - throw new HCatException(ErrorType.ERROR_INIT_STORAGE_HANDLER, e); - } - } - } - - /** - * Configure the output storage handler, with allowing specification - * of partvals from which it picks the dynamic partvals - * @param context the job context - * @param jobInfo the output job info - * @param fullPartSpec - * @throws IOException - */ - - protected static void configureOutputStorageHandler( - JobContext context, OutputJobInfo jobInfo, - Map fullPartSpec) throws IOException { - List dynamicPartKeys = jobInfo.getDynamicPartitioningKeys(); - if ((dynamicPartKeys == null) || (dynamicPartKeys.isEmpty())) { - configureOutputStorageHandler(context, (List) null); - } else { - List dynKeyVals = new ArrayList(); - for (String dynamicPartKey : dynamicPartKeys) { - dynKeyVals.add(fullPartSpec.get(dynamicPartKey)); - } - configureOutputStorageHandler(context, dynKeyVals); - } - } - - - protected static void setPartDetails(OutputJobInfo jobInfo, final HCatSchema schema, - Map partMap) throws HCatException, IOException { - List posOfPartCols = new ArrayList(); - List posOfDynPartCols = new ArrayList(); - - // If partition columns occur in data, we want to remove them. - // So, find out positions of partition columns in schema provided by user. - // We also need to update the output Schema with these deletions. - - // Note that, output storage handlers never sees partition columns in data - // or schema. - - HCatSchema schemaWithoutParts = new HCatSchema(schema.getFields()); - for (String partKey : partMap.keySet()) { - Integer idx; - if ((idx = schema.getPosition(partKey)) != null) { - posOfPartCols.add(idx); - schemaWithoutParts.remove(schema.get(partKey)); - } - } - - // Also, if dynamic partitioning is being used, we want to - // set appropriate list of columns for the columns to be dynamically specified. - // These would be partition keys too, so would also need to be removed from - // output schema and partcols - - if (jobInfo.isDynamicPartitioningUsed()) { - for (String partKey : jobInfo.getDynamicPartitioningKeys()) { - Integer idx; - if ((idx = schema.getPosition(partKey)) != null) { - posOfPartCols.add(idx); - posOfDynPartCols.add(idx); - schemaWithoutParts.remove(schema.get(partKey)); - } - } - } - - HCatUtil.validatePartitionSchema( - new Table(jobInfo.getTableInfo().getTable()), schemaWithoutParts); - jobInfo.setPosOfPartCols(posOfPartCols); - jobInfo.setPosOfDynPartCols(posOfDynPartCols); - jobInfo.setOutputSchema(schemaWithoutParts); - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatEximInputFormat.java.broken hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatEximInputFormat.java.broken deleted file mode 100644 index 71b9652..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatEximInputFormat.java.broken +++ /dev/null @@ -1,141 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.ql.parse.EximUtil; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; - -/** The InputFormat to use to read data from HCat */ -public class HCatEximInputFormat extends HCatBaseInputFormat { - - /** - * Set the input to use for the Job. This queries the metadata file with - * the specified partition predicates, gets the matching partitions, puts - * the information in the conf object. The inputInfo object is updated with - * information needed in the client context - * - * @param job the job object - * @return two hcat schemas, for the table columns and the partition keys - * @throws IOException - * the exception in communicating with the metadata server - */ - public static List setInput(Job job, - String location, - Map partitionFilter) throws IOException { - FileSystem fs; - try { - fs = FileSystem.get(new URI(location), job.getConfiguration()); - } catch (URISyntaxException e) { - throw new IOException(e); - } - Path fromPath = new Path(location); - Path metadataPath = new Path(fromPath, "_metadata"); - try { - Map.Entry> tp = EximUtil - .readMetaData(fs, metadataPath); - org.apache.hadoop.hive.metastore.api.Table table = tp.getKey(); - InputJobInfo inputInfo = InputJobInfo.create(table.getDbName(), table.getTableName(),null,null,null); - List partCols = table.getPartitionKeys(); - List partInfoList = null; - if (partCols.size() > 0) { - List partColNames = new ArrayList(partCols.size()); - for (FieldSchema fsc : partCols) { - partColNames.add(fsc.getName()); - } - List partitions = tp.getValue(); - partInfoList = filterPartitions(partitionFilter, partitions, table.getPartitionKeys()); - } else { - partInfoList = new ArrayList(1); - HCatSchema schema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getSd().getCols())); - Map parameters = table.getParameters(); - String inputStorageDriverClass = null; - if (parameters.containsKey(HCatConstants.HCAT_ISD_CLASS)){ - inputStorageDriverClass = parameters.get(HCatConstants.HCAT_ISD_CLASS); - }else{ - throw new IOException("No input storage driver classname found, cannot read partition"); - } - Properties hcatProperties = new Properties(); - for (String key : parameters.keySet()){ - if (key.startsWith(InitializeInput.HCAT_KEY_PREFIX)){ - hcatProperties.put(key, parameters.get(key)); - } - } - PartInfo partInfo = new PartInfo(schema, inputStorageDriverClass, location + "/data", hcatProperties); - partInfoList.add(partInfo); - } - inputInfo.setPartitions(partInfoList); - inputInfo.setTableInfo(HCatTableInfo.valueOf(table)); - job.getConfiguration().set( - HCatConstants.HCAT_KEY_JOB_INFO, - HCatUtil.serialize(inputInfo)); - List rv = new ArrayList(2); - rv.add(HCatSchemaUtils.getHCatSchema(table.getSd().getCols())); - rv.add(HCatSchemaUtils.getHCatSchema(partCols)); - return rv; - } catch(SemanticException e) { - throw new IOException(e); - } - } - - private static List filterPartitions(Map partitionFilter, - List partitions, List partCols) throws IOException { - List partInfos = new LinkedList(); - for (Partition partition : partitions) { - boolean matches = true; - List partVals = partition.getValues(); - assert partCols.size() == partVals.size(); - Map partSpec = EximUtil.makePartSpec(partCols, partVals); - if (partitionFilter != null) { - for (Map.Entry constraint : partitionFilter.entrySet()) { - String partVal = partSpec.get(constraint.getKey()); - if ((partVal == null) || !partVal.equals(constraint.getValue())) { - matches = false; - break; - } - } - } - if (matches) { - PartInfo partInfo = InitializeInput.extractPartInfo(partition.getSd(), - partition.getParameters()); - partInfo.setPartitionValues(partSpec); - partInfos.add(partInfo); - } - } - return partInfos; - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatEximOutputCommitter.java.broken hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatEximOutputCommitter.java.broken deleted file mode 100644 index 0ab8c22..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatEximOutputCommitter.java.broken +++ /dev/null @@ -1,166 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.parse.EximUtil; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.JobStatus; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hcatalog.common.ErrorType; -import org.apache.hcatalog.common.HCatException; - -public class HCatEximOutputCommitter extends OutputCommitter { - - private static final Log LOG = LogFactory.getLog(HCatEximOutputCommitter.class); - - private final OutputCommitter baseCommitter; - - public HCatEximOutputCommitter(JobContext context, OutputCommitter baseCommitter) { - this.baseCommitter = baseCommitter; - } - - @Override - public void abortTask(TaskAttemptContext context) throws IOException { - baseCommitter.abortTask(context); - } - - @Override - public void commitTask(TaskAttemptContext context) throws IOException { - baseCommitter.commitTask(context); - } - - @Override - public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { - return baseCommitter.needsTaskCommit(context); - } - - @Override - public void setupJob(JobContext context) throws IOException { - if( baseCommitter != null ) { - baseCommitter.setupJob(context); - } - } - - @Override - public void setupTask(TaskAttemptContext context) throws IOException { - baseCommitter.setupTask(context); - } - - @Override - public void abortJob(JobContext jobContext, JobStatus.State state) throws IOException { - if(baseCommitter != null) { - baseCommitter.abortJob(jobContext, state); - } - OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext); - - Path src = new Path(jobInfo.getLocation()); - FileSystem fs = src.getFileSystem(jobContext.getConfiguration()); - fs.delete(src, true); - } - - @Override - public void commitJob(JobContext jobContext) throws IOException { - if(baseCommitter != null) { - baseCommitter.commitJob(jobContext); - } - } - - @Override - public void cleanupJob(JobContext jobContext) throws IOException { - LOG.info("HCatEximOutputCommitter.cleanup invoked; m.o.d : " + - jobContext.getConfiguration().get("mapred.output.dir")); - if (baseCommitter != null) { - LOG.info("baseCommitter.class = " + baseCommitter.getClass().getName()); - baseCommitter.cleanupJob(jobContext); - } - - OutputJobInfo jobInfo = HCatBaseOutputFormat.getJobInfo(jobContext); - Configuration conf = jobContext.getConfiguration(); - FileSystem fs; - try { - fs = FileSystem.get(new URI(jobInfo.getTableInfo().getTable().getSd().getLocation()), conf); - } catch (URISyntaxException e) { - throw new IOException(e); - } - doCleanup(jobInfo, fs); - } - - private static void doCleanup(OutputJobInfo jobInfo, FileSystem fs) throws IOException, - HCatException { - try { - Table ttable = jobInfo.getTableInfo().getTable(); - org.apache.hadoop.hive.ql.metadata.Table table = new org.apache.hadoop.hive.ql.metadata.Table( - ttable); - StorageDescriptor tblSD = ttable.getSd(); - Path tblPath = new Path(tblSD.getLocation()); - Path path = new Path(tblPath, "_metadata"); - List tpartitions = null; - try { - Map.Entry> rv = EximUtil - .readMetaData(fs, path); - tpartitions = rv.getValue(); - } catch (IOException e) { - } - List partitions = - new ArrayList(); - if (tpartitions != null) { - for (Partition tpartition : tpartitions) { - partitions.add(new org.apache.hadoop.hive.ql.metadata.Partition(table, tpartition)); - } - } - if (!table.getPartitionKeys().isEmpty()) { - Map partitionValues = jobInfo.getPartitionValues(); - org.apache.hadoop.hive.ql.metadata.Partition partition = - new org.apache.hadoop.hive.ql.metadata.Partition(table, - partitionValues, - new Path(tblPath, Warehouse.makePartPath(partitionValues))); - partition.getTPartition().setParameters(table.getParameters()); - partitions.add(partition); - } - EximUtil.createExportDump(fs, path, (table), partitions); - } catch (SemanticException e) { - throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e); - } catch (HiveException e) { - throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e); - } catch (MetaException e) { - throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e); - } - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatEximOutputFormat.java.broken hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatEximOutputFormat.java.broken deleted file mode 100644 index 6181284..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatEximOutputFormat.java.broken +++ /dev/null @@ -1,176 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.TreeMap; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.SerDeInfo; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hcatalog.common.ErrorType; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; -import org.apache.hcatalog.rcfile.RCFileInputDriver; -import org.apache.hcatalog.rcfile.RCFileOutputDriver; - -/** - * The OutputFormat to use to write data to HCat without a hcat server. This can then - * be imported into a hcat instance, or used with a HCatEximInputFormat. As in - * HCatOutputFormat, the key value is ignored and - * and should be given as null. The value is the HCatRecord to write. - */ -public class HCatEximOutputFormat extends HCatBaseOutputFormat { - - private static final Log LOG = LogFactory.getLog(HCatEximOutputFormat.class); - - /** - * Get the record writer for the job. Uses the Table's default OutputStorageDriver - * to get the record writer. - * - * @param context - * the information about the current task. - * @return a RecordWriter to write the output for the job. - * @throws IOException - */ - @Override - public RecordWriter, HCatRecord> - getRecordWriter(TaskAttemptContext context - ) throws IOException, InterruptedException { - return getOutputFormat(context).getRecordWriter(context); - } - - /** - * Get the output committer for this output format. This is responsible - * for ensuring the output is committed correctly. - * @param context the task context - * @return an output committer - * @throws IOException - * @throws InterruptedException - */ - @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { - return new HCatEximOutputCommitter(context,((OutputCommitterContainer)getOutputFormat(context).getOutputCommitter(context)).getBaseOutputCommitter()); - } - - /** - * Check for validity of the output-specification for the job. - * @param context information about the job - * @throws IOException when output should not be attempted - */ - @Override - public void checkOutputSpecs(JobContext context - ) throws IOException, InterruptedException { - ((OutputFormatContainer)getOutputFormat(context)).getBaseOutputFormat().checkOutputSpecs(context); - } - - public static void setOutput(Job job, String dbname, String tablename, String location, - HCatSchema partitionSchema, List partitionValues, HCatSchema columnSchema) throws HCatException { - setOutput(job, dbname, tablename, location, partitionSchema, partitionValues, columnSchema, - RCFileInputDriver.class.getName(), - RCFileOutputDriver.class.getName(), - RCFileInputFormat.class.getName(), - RCFileOutputFormat.class.getName(), - ColumnarSerDe.class.getName()); - } - - @SuppressWarnings("unchecked") - public static void setOutput(Job job, String dbname, String tablename, String location, - HCatSchema partitionSchema, - List partitionValues, - HCatSchema columnSchema, - String isdname, String osdname, - String ifname, String ofname, - String serializationLib) throws HCatException { - Map partSpec = new TreeMap(); - List partKeys = null; - if (partitionSchema != null) { - partKeys = partitionSchema.getFields(); - if (partKeys.size() != partitionValues.size()) { - throw new IllegalArgumentException("Partition key size differs from partition value size"); - } - for (int i = 0; i < partKeys.size(); ++i) { - HCatFieldSchema partKey = partKeys.get(i); - if (partKey.getType() != HCatFieldSchema.Type.STRING) { - throw new IllegalArgumentException("Partition key type string is only supported"); - } - partSpec.put(partKey.getName(), partitionValues.get(i)); - } - } - StorerInfo storerInfo = new StorerInfo(isdname, osdname, new Properties()); - OutputJobInfo outputJobInfo = OutputJobInfo.create(dbname,tablename,partSpec,null,null); - org.apache.hadoop.hive.ql.metadata.Table tbl = new - org.apache.hadoop.hive.ql.metadata.Table(dbname, tablename); - Table table = tbl.getTTable(); - table.getParameters().put(HCatConstants.HCAT_ISD_CLASS, isdname); - table.getParameters().put(HCatConstants.HCAT_OSD_CLASS, osdname); - try { - String partname = null; - if ((partKeys != null) && !partKeys.isEmpty()) { - List partSchema = HCatSchemaUtils.getFieldSchemas(partKeys); - table.setPartitionKeys(partSchema); - partname = Warehouse.makePartName(partSchema, partitionValues); - } else { - partname = "data"; - } - StorageDescriptor sd = table.getSd(); - sd.setLocation(location); - String dataLocation = location + "/" + partname; - outputJobInfo.setTableInfo(new HCatTableInfo(dbname,tablename,columnSchema,null,storerInfo,table)); - outputJobInfo.setOutputSchema(columnSchema); - outputJobInfo.setLocation(dataLocation); - setPartDetails(outputJobInfo, columnSchema, partSpec); - sd.setCols(HCatUtil.getFieldSchemaList(outputJobInfo.getOutputSchema().getFields())); - sd.setInputFormat(ifname); - sd.setOutputFormat(ofname); - SerDeInfo serdeInfo = sd.getSerdeInfo(); - serdeInfo.setSerializationLib(serializationLib); - Configuration conf = job.getConfiguration(); - conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); - } catch (IOException e) { - throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e); - } catch (MetaException e) { - throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e); - } - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatInputFormat.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatInputFormat.java deleted file mode 100644 index 786e375..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatInputFormat.java +++ /dev/null @@ -1,136 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.util.Properties; - -import com.google.common.base.Preconditions; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapreduce.Job; - -/** - * The InputFormat to use to read data from HCatalog. - */ -@InterfaceAudience.Public -@InterfaceStability.Evolving -public class HCatInputFormat extends HCatBaseInputFormat { - - private Configuration conf; - private InputJobInfo inputJobInfo; - - /** - * @deprecated as of release 0.5, and will be removed in a future release - */ - @Deprecated - public static void setInput(Job job, InputJobInfo inputJobInfo) throws IOException { - setInput(job.getConfiguration(), inputJobInfo); - } - - /** - * @deprecated as of release 0.5, and will be removed in a future release - */ - @Deprecated - public static void setInput(Configuration conf, InputJobInfo inputJobInfo) throws IOException { - setInput(conf, inputJobInfo.getDatabaseName(), inputJobInfo.getTableName()) - .setFilter(inputJobInfo.getFilter()) - .setProperties(inputJobInfo.getProperties()); - } - - /** - * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String)} - */ - public static HCatInputFormat setInput(Job job, String dbName, String tableName) throws IOException { - return setInput(job.getConfiguration(), dbName, tableName); - } - - /** - * Set inputs to use for the job. This queries the metastore with the given input - * specification and serializes matching partitions into the job conf for use by MR tasks. - * @param conf the job configuration - * @param dbName database name, which if null 'default' is used - * @param tableName table name - * @throws IOException on all errors - */ - public static HCatInputFormat setInput(Configuration conf, String dbName, String tableName) - throws IOException { - - Preconditions.checkNotNull(conf, "required argument 'conf' is null"); - Preconditions.checkNotNull(tableName, "required argument 'tableName' is null"); - - HCatInputFormat hCatInputFormat = new HCatInputFormat(); - hCatInputFormat.conf = conf; - hCatInputFormat.inputJobInfo = InputJobInfo.create(dbName, tableName, null, null); - - try { - InitializeInput.setInput(conf, hCatInputFormat.inputJobInfo); - } catch (Exception e) { - throw new IOException(e); - } - - return hCatInputFormat; - } - - /** - * Set a filter on the input table. - * @param filter the filter specification, which may be null - * @return this - * @throws IOException on all errors - */ - public HCatInputFormat setFilter(String filter) throws IOException { - // null filters are supported to simplify client code - if (filter != null) { - inputJobInfo = InputJobInfo.create( - inputJobInfo.getDatabaseName(), - inputJobInfo.getTableName(), - filter, - inputJobInfo.getProperties()); - try { - InitializeInput.setInput(conf, inputJobInfo); - } catch (Exception e) { - throw new IOException(e); - } - } - return this; - } - - /** - * Set properties for the input format. - * @param properties properties for the input specification - * @return this - * @throws IOException on all errors - */ - public HCatInputFormat setProperties(Properties properties) throws IOException { - Preconditions.checkNotNull(properties, "required argument 'properties' is null"); - inputJobInfo = InputJobInfo.create( - inputJobInfo.getDatabaseName(), - inputJobInfo.getTableName(), - inputJobInfo.getFilter(), - properties); - try { - InitializeInput.setInput(conf, inputJobInfo); - } catch (Exception e) { - throw new IOException(e); - } - return this; - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatMapRedUtil.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatMapRedUtil.java deleted file mode 100644 index 3446d25..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatMapRedUtil.java +++ /dev/null @@ -1,56 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.util.Progressable; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.JobContext; -import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapred.TaskAttemptContext; -import org.apache.hadoop.mapred.TaskAttemptID; - -public class HCatMapRedUtil { - - public static TaskAttemptContext createTaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext context) { - return createTaskAttemptContext(new JobConf(context.getConfiguration()), - org.apache.hadoop.mapred.TaskAttemptID.forName(context.getTaskAttemptID().toString()), - Reporter.NULL); - } - - public static org.apache.hadoop.mapreduce.TaskAttemptContext createTaskAttemptContext(Configuration conf, org.apache.hadoop.mapreduce.TaskAttemptID id) { - return ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf,id); - } - - public static TaskAttemptContext createTaskAttemptContext(JobConf conf, TaskAttemptID id, Progressable progressable) { - return ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, id, (Reporter) progressable); - } - - public static org.apache.hadoop.mapred.JobContext createJobContext(org.apache.hadoop.mapreduce.JobContext context) { - return createJobContext((JobConf)context.getConfiguration(), - context.getJobID(), - Reporter.NULL); - } - - public static JobContext createJobContext(JobConf conf, org.apache.hadoop.mapreduce.JobID id, Progressable progressable) { - return ShimLoader.getHadoopShims().getHCatShim().createJobContext(conf, id, (Reporter) progressable); - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java deleted file mode 100644 index 717ba6a..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatOutputFormat.java +++ /dev/null @@ -1,280 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Index; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.security.Credentials; -import org.apache.hcatalog.common.ErrorType; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** The OutputFormat to use to write data to HCatalog. The key value is ignored and - * should be given as null. The value is the HCatRecord to write.*/ -public class HCatOutputFormat extends HCatBaseOutputFormat { - - static final private Logger LOG = LoggerFactory.getLogger(HCatOutputFormat.class); - - private static int maxDynamicPartitions; - private static boolean harRequested; - - /** - * @see org.apache.hcatalog.mapreduce.HCatOutputFormat#setOutput(org.apache.hadoop.conf.Configuration, Credentials, OutputJobInfo) - */ - public static void setOutput(Job job, OutputJobInfo outputJobInfo) throws IOException { - setOutput(job.getConfiguration(), job.getCredentials(), outputJobInfo); - } - - /** - * Set the information about the output to write for the job. This queries the metadata server - * to find the StorageHandler to use for the table. It throws an error if the - * partition is already published. - * @param conf the Configuration object - * @param credentials the Credentials object - * @param outputJobInfo the table output information for the job - * @throws IOException the exception in communicating with the metadata server - */ - @SuppressWarnings("unchecked") - public static void setOutput(Configuration conf, Credentials credentials, - OutputJobInfo outputJobInfo) throws IOException { - HiveMetaStoreClient client = null; - - try { - - HiveConf hiveConf = HCatUtil.getHiveConf(conf); - client = HCatUtil.getHiveClient(hiveConf); - Table table = HCatUtil.getTable(client, outputJobInfo.getDatabaseName(), - outputJobInfo.getTableName()); - - List indexList = client.listIndexNames(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), Short.MAX_VALUE); - - for (String indexName : indexList) { - Index index = client.getIndex(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), indexName); - if (!index.isDeferredRebuild()) { - throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a table with an automatic index from Pig/Mapreduce is not supported"); - } - } - StorageDescriptor sd = table.getTTable().getSd(); - - if (sd.isCompressed()) { - throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a compressed partition from Pig/Mapreduce is not supported"); - } - - if (sd.getBucketCols() != null && !sd.getBucketCols().isEmpty()) { - throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with bucket definition from Pig/Mapreduce is not supported"); - } - - if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) { - throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with sorted column definition from Pig/Mapreduce is not supported"); - } - - if (table.getTTable().getPartitionKeysSize() == 0) { - if ((outputJobInfo.getPartitionValues() != null) && (!outputJobInfo.getPartitionValues().isEmpty())) { - // attempt made to save partition values in non-partitioned table - throw error. - throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, - "Partition values specified for non-partitioned table"); - } - // non-partitioned table - outputJobInfo.setPartitionValues(new HashMap()); - - } else { - // partitioned table, we expect partition values - // convert user specified map to have lower case key names - Map valueMap = new HashMap(); - if (outputJobInfo.getPartitionValues() != null) { - for (Map.Entry entry : outputJobInfo.getPartitionValues().entrySet()) { - valueMap.put(entry.getKey().toLowerCase(), entry.getValue()); - } - } - - if ((outputJobInfo.getPartitionValues() == null) - || (outputJobInfo.getPartitionValues().size() < table.getTTable().getPartitionKeysSize())) { - // dynamic partition usecase - partition values were null, or not all were specified - // need to figure out which keys are not specified. - List dynamicPartitioningKeys = new ArrayList(); - boolean firstItem = true; - for (FieldSchema fs : table.getPartitionKeys()) { - if (!valueMap.containsKey(fs.getName().toLowerCase())) { - dynamicPartitioningKeys.add(fs.getName().toLowerCase()); - } - } - - if (valueMap.size() + dynamicPartitioningKeys.size() != table.getTTable().getPartitionKeysSize()) { - // If this isn't equal, then bogus key values have been inserted, error out. - throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Invalid partition keys specified"); - } - - outputJobInfo.setDynamicPartitioningKeys(dynamicPartitioningKeys); - String dynHash; - if ((dynHash = conf.get(HCatConstants.HCAT_DYNAMIC_PTN_JOBID)) == null) { - dynHash = String.valueOf(Math.random()); -// LOG.info("New dynHash : ["+dynHash+"]"); -// }else{ -// LOG.info("Old dynHash : ["+dynHash+"]"); - } - conf.set(HCatConstants.HCAT_DYNAMIC_PTN_JOBID, dynHash); - - } - - outputJobInfo.setPartitionValues(valueMap); - } - - // To get around hbase failure on single node, see BUG-4383 - conf.set("dfs.client.read.shortcircuit", "false"); - HCatSchema tableSchema = HCatUtil.extractSchema(table); - StorerInfo storerInfo = - InternalUtil.extractStorerInfo(table.getTTable().getSd(), table.getParameters()); - - List partitionCols = new ArrayList(); - for (FieldSchema schema : table.getPartitionKeys()) { - partitionCols.add(schema.getName()); - } - - HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo); - - //Serialize the output info into the configuration - outputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable())); - outputJobInfo.setOutputSchema(tableSchema); - harRequested = getHarRequested(hiveConf); - outputJobInfo.setHarRequested(harRequested); - maxDynamicPartitions = getMaxDynamicPartitions(hiveConf); - outputJobInfo.setMaximumDynamicPartitions(maxDynamicPartitions); - - HCatUtil.configureOutputStorageHandler(storageHandler, conf, outputJobInfo); - - Path tblPath = new Path(table.getTTable().getSd().getLocation()); - - /* Set the umask in conf such that files/dirs get created with table-dir - * permissions. Following three assumptions are made: - * 1. Actual files/dirs creation is done by RecordWriter of underlying - * output format. It is assumed that they use default permissions while creation. - * 2. Default Permissions = FsPermission.getDefault() = 777. - * 3. UMask is honored by underlying filesystem. - */ - - FsPermission.setUMask(conf, FsPermission.getDefault().applyUMask( - tblPath.getFileSystem(conf).getFileStatus(tblPath).getPermission())); - - if (Security.getInstance().isSecurityEnabled()) { - Security.getInstance().handleSecurity(credentials, outputJobInfo, client, conf, harRequested); - } - } catch (Exception e) { - if (e instanceof HCatException) { - throw (HCatException) e; - } else { - throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e); - } - } finally { - HCatUtil.closeHiveClientQuietly(client); - } - } - - /** - * @see org.apache.hcatalog.mapreduce.HCatOutputFormat#setSchema(org.apache.hadoop.conf.Configuration, org.apache.hcatalog.data.schema.HCatSchema) - */ - public static void setSchema(final Job job, final HCatSchema schema) throws IOException { - setSchema(job.getConfiguration(), schema); - } - - /** - * Set the schema for the data being written out to the partition. The - * table schema is used by default for the partition if this is not called. - * @param conf the job Configuration object - * @param schema the schema for the data - * @throws IOException - */ - public static void setSchema(final Configuration conf, final HCatSchema schema) throws IOException { - OutputJobInfo jobInfo = getJobInfo(conf); - Map partMap = jobInfo.getPartitionValues(); - setPartDetails(jobInfo, schema, partMap); - conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(jobInfo)); - } - - /** - * Get the record writer for the job. This uses the StorageHandler's default - * OutputFormat to get the record writer. - * @param context the information about the current task - * @return a RecordWriter to write the output for the job - * @throws IOException - * @throws InterruptedException - */ - @Override - public RecordWriter, HCatRecord> - getRecordWriter(TaskAttemptContext context) - throws IOException, InterruptedException { - return getOutputFormat(context).getRecordWriter(context); - } - - - /** - * Get the output committer for this output format. This is responsible - * for ensuring the output is committed correctly. - * @param context the task context - * @return an output committer - * @throws IOException - * @throws InterruptedException - */ - @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context - ) throws IOException, InterruptedException { - return getOutputFormat(context).getOutputCommitter(context); - } - - private static int getMaxDynamicPartitions(HiveConf hConf) { - // by default the bounds checking for maximum number of - // dynamic partitions is disabled (-1) - int maxDynamicPartitions = -1; - - if (HCatConstants.HCAT_IS_DYNAMIC_MAX_PTN_CHECK_ENABLED) { - maxDynamicPartitions = hConf.getIntVar( - HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS); - } - - return maxDynamicPartitions; - } - - private static boolean getHarRequested(HiveConf hConf) { - return hConf.getBoolVar(HiveConf.ConfVars.HIVEARCHIVEENABLED); - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatRecordReader.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatRecordReader.java deleted file mode 100644 index ac3753f..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatRecordReader.java +++ /dev/null @@ -1,285 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.util.Map; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.serde2.Deserializer; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.util.ReflectionUtils; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.LazyHCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** The HCat wrapper for the underlying RecordReader, - * this ensures that the initialize on - * the underlying record reader is done with the underlying split, - * not with HCatSplit. - */ -class HCatRecordReader extends RecordReader { - - private static final Logger LOG = LoggerFactory.getLogger(HCatRecordReader.class); - - private InputErrorTracker errorTracker; - - WritableComparable currentKey; - Writable currentValue; - HCatRecord currentHCatRecord; - - /** The underlying record reader to delegate to. */ - private org.apache.hadoop.mapred.RecordReader baseRecordReader; - - /** The storage handler used */ - private final HCatStorageHandler storageHandler; - - private Deserializer deserializer; - - private Map valuesNotInDataCols; - - private HCatSchema outputSchema = null; - private HCatSchema dataSchema = null; - - /** - * Instantiates a new hcat record reader. - */ - public HCatRecordReader(HCatStorageHandler storageHandler, - Map valuesNotInDataCols) { - this.storageHandler = storageHandler; - this.valuesNotInDataCols = valuesNotInDataCols; - } - - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.RecordReader#initialize( - * org.apache.hadoop.mapreduce.InputSplit, - * org.apache.hadoop.mapreduce.TaskAttemptContext) - */ - @Override - public void initialize(org.apache.hadoop.mapreduce.InputSplit split, - TaskAttemptContext taskContext) throws IOException, InterruptedException { - - HCatSplit hcatSplit = InternalUtil.castToHCatSplit(split); - - baseRecordReader = createBaseRecordReader(hcatSplit, storageHandler, taskContext); - createDeserializer(hcatSplit, storageHandler, taskContext); - - // Pull the output schema out of the TaskAttemptContext - outputSchema = (HCatSchema) HCatUtil.deserialize( - taskContext.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA)); - - if (outputSchema == null) { - outputSchema = hcatSplit.getTableSchema(); - } - - // Pull the table schema out of the Split info - // TODO This should be passed in the TaskAttemptContext instead - dataSchema = hcatSplit.getDataSchema(); - - errorTracker = new InputErrorTracker(taskContext.getConfiguration()); - } - - private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit, - HCatStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { - - JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext); - HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf); - org.apache.hadoop.mapred.InputFormat inputFormat = - HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass()); - return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf, - InternalUtil.createReporter(taskContext)); - } - - private void createDeserializer(HCatSplit hcatSplit, HCatStorageHandler storageHandler, - TaskAttemptContext taskContext) throws IOException { - - deserializer = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), - taskContext.getConfiguration()); - - try { - InternalUtil.initializeDeserializer(deserializer, storageHandler.getConf(), - hcatSplit.getPartitionInfo().getTableInfo(), - hcatSplit.getPartitionInfo().getPartitionSchema()); - } catch (SerDeException e) { - throw new IOException("Failed initializing deserializer " - + storageHandler.getSerDeClass().getName(), e); - } - } - - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentKey() - */ - @Override - public WritableComparable getCurrentKey() - throws IOException, InterruptedException { - return currentKey; - } - - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentValue() - */ - @Override - public HCatRecord getCurrentValue() throws IOException, InterruptedException { - return currentHCatRecord; - } - - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.RecordReader#getProgress() - */ - @Override - public float getProgress() { - try { - return baseRecordReader.getProgress(); - } catch (IOException e) { - LOG.warn("Exception in HCatRecord reader", e); - } - return 0.0f; // errored - } - - /** - * Check if the wrapped RecordReader has another record, and if so convert it into an - * HCatRecord. We both check for records and convert here so a configurable percent of - * bad records can be tolerated. - * - * @return if there is a next record - * @throws IOException on error - * @throws InterruptedException on error - */ - @Override - public boolean nextKeyValue() throws IOException, InterruptedException { - if (currentKey == null) { - currentKey = baseRecordReader.createKey(); - currentValue = baseRecordReader.createValue(); - } - - while (baseRecordReader.next(currentKey, currentValue)) { - HCatRecord r = null; - Throwable t = null; - - errorTracker.incRecords(); - - try { - Object o = deserializer.deserialize(currentValue); - r = new LazyHCatRecord(o, deserializer.getObjectInspector()); - } catch (Throwable throwable) { - t = throwable; - } - - if (r == null) { - errorTracker.incErrors(t); - continue; - } - - DefaultHCatRecord dr = new DefaultHCatRecord(outputSchema.size()); - int i = 0; - for (String fieldName : outputSchema.getFieldNames()) { - if (dataSchema.getPosition(fieldName) != null) { - dr.set(i, r.get(fieldName, dataSchema)); - } else { - dr.set(i, valuesNotInDataCols.get(fieldName)); - } - i++; - } - - currentHCatRecord = dr; - return true; - } - - return false; - } - - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.RecordReader#close() - */ - @Override - public void close() throws IOException { - baseRecordReader.close(); - } - - /** - * Tracks number of of errors in input and throws a Runtime exception - * if the rate of errors crosses a limit. - *
- * The intention is to skip over very rare file corruption or incorrect - * input, but catch programmer errors (incorrect format, or incorrect - * deserializers etc). - * - * This class was largely copied from Elephant-Bird (thanks @rangadi!) - * https://github.com/kevinweil/elephant-bird/blob/master/core/src/main/java/com/twitter/elephantbird/mapreduce/input/LzoRecordReader.java - */ - static class InputErrorTracker { - long numRecords; - long numErrors; - - double errorThreshold; // max fraction of errors allowed - long minErrors; // throw error only after this many errors - - InputErrorTracker(Configuration conf) { - errorThreshold = conf.getFloat(HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY, - HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_DEFAULT); - minErrors = conf.getLong(HCatConstants.HCAT_INPUT_BAD_RECORD_MIN_KEY, - HCatConstants.HCAT_INPUT_BAD_RECORD_MIN_DEFAULT); - numRecords = 0; - numErrors = 0; - } - - void incRecords() { - numRecords++; - } - - void incErrors(Throwable cause) { - numErrors++; - if (numErrors > numRecords) { - // incorrect use of this class - throw new RuntimeException("Forgot to invoke incRecords()?"); - } - - if (cause == null) { - cause = new Exception("Unknown error"); - } - - if (errorThreshold <= 0) { // no errors are tolerated - throw new RuntimeException("error while reading input records", cause); - } - - LOG.warn("Error while reading an input record (" - + numErrors + " out of " + numRecords + " so far ): ", cause); - - double errRate = numErrors / (double) numRecords; - - // will always excuse the first error. We can decide if single - // error crosses threshold inside close() if we want to. - if (numErrors >= minErrors && errRate > errorThreshold) { - LOG.error(numErrors + " out of " + numRecords - + " crosses configured threshold (" + errorThreshold + ")"); - throw new RuntimeException("error rate while reading input records crossed threshold", cause); - } - } - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatSplit.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatSplit.java deleted file mode 100644 index 3773b73..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatSplit.java +++ /dev/null @@ -1,186 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.mapreduce; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.lang.reflect.Constructor; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableUtils; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** The HCatSplit wrapper around the InputSplit returned by the underlying InputFormat */ -public class HCatSplit extends InputSplit - implements Writable, org.apache.hadoop.mapred.InputSplit { - - private static final Logger LOG = LoggerFactory.getLogger(HCatSplit.class); - /** The partition info for the split. */ - private PartInfo partitionInfo; - - /** The split returned by the underlying InputFormat split. */ - private org.apache.hadoop.mapred.InputSplit baseMapRedSplit; - - /** The schema for the HCatTable */ - private HCatSchema tableSchema; - - private HiveConf hiveConf; - - /** - * Instantiates a new hcat split. - */ - public HCatSplit() { - } - - /** - * Instantiates a new hcat split. - * - * @param partitionInfo the partition info - * @param baseMapRedSplit the base mapred split - * @param tableSchema the table level schema - */ - public HCatSplit(PartInfo partitionInfo, - org.apache.hadoop.mapred.InputSplit baseMapRedSplit, - HCatSchema tableSchema) { - - this.partitionInfo = partitionInfo; - // dataSchema can be obtained from partitionInfo.getPartitionSchema() - this.baseMapRedSplit = baseMapRedSplit; - this.tableSchema = tableSchema; - } - - /** - * Gets the partition info. - * @return the partitionInfo - */ - public PartInfo getPartitionInfo() { - return partitionInfo; - } - - /** - * Gets the underlying InputSplit. - * @return the baseMapRedSplit - */ - public org.apache.hadoop.mapred.InputSplit getBaseSplit() { - return baseMapRedSplit; - } - - /** - * Gets the data schema. - * @return the table schema - */ - public HCatSchema getDataSchema() { - return this.partitionInfo.getPartitionSchema(); - } - - /** - * Gets the table schema. - * @return the table schema - */ - public HCatSchema getTableSchema() { - return this.tableSchema; - } - - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.InputSplit#getLength() - */ - @Override - public long getLength() { - try { - return baseMapRedSplit.getLength(); - } catch (IOException e) { - LOG.warn("Exception in HCatSplit", e); - } - return 0; // we errored - } - - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.InputSplit#getLocations() - */ - @Override - public String[] getLocations() { - try { - return baseMapRedSplit.getLocations(); - } catch (IOException e) { - LOG.warn("Exception in HCatSplit", e); - } - return new String[0]; // we errored - } - - /* (non-Javadoc) - * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput) - */ - @SuppressWarnings("unchecked") - @Override - public void readFields(DataInput input) throws IOException { - String partitionInfoString = WritableUtils.readString(input); - partitionInfo = (PartInfo) HCatUtil.deserialize(partitionInfoString); - - String baseSplitClassName = WritableUtils.readString(input); - org.apache.hadoop.mapred.InputSplit split; - try { - Class splitClass = - (Class) Class.forName(baseSplitClassName); - - //Class.forName().newInstance() does not work if the underlying - //InputSplit has package visibility - Constructor - constructor = - splitClass.getDeclaredConstructor(new Class[]{}); - constructor.setAccessible(true); - - split = constructor.newInstance(); - // read baseSplit from input - ((Writable) split).readFields(input); - this.baseMapRedSplit = split; - } catch (Exception e) { - throw new IOException("Exception from " + baseSplitClassName, e); - } - - String tableSchemaString = WritableUtils.readString(input); - tableSchema = (HCatSchema) HCatUtil.deserialize(tableSchemaString); - } - - /* (non-Javadoc) - * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput) - */ - @Override - public void write(DataOutput output) throws IOException { - String partitionInfoString = HCatUtil.serialize(partitionInfo); - - // write partitionInfo into output - WritableUtils.writeString(output, partitionInfoString); - - WritableUtils.writeString(output, baseMapRedSplit.getClass().getName()); - Writable baseSplitWritable = (Writable) baseMapRedSplit; - //write baseSplit into output - baseSplitWritable.write(output); - - //write the table schema into output - String tableSchemaString = HCatUtil.serialize(tableSchema); - WritableUtils.writeString(output, tableSchemaString); - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatStorageHandler.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatStorageHandler.java deleted file mode 100644 index 2f00ba2..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatStorageHandler.java +++ /dev/null @@ -1,120 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.util.Map; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.plan.TableDesc; -import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; -import org.apache.hadoop.mapred.OutputFormat; - -/** - * The abstract Class HCatStorageHandler would server as the base class for all - * the storage handlers required for non-native tables in HCatalog. - */ -public abstract class HCatStorageHandler extends DefaultStorageHandler { - - //TODO move this to HiveStorageHandler - - /** - * This method is called to allow the StorageHandlers the chance - * to populate the JobContext.getConfiguration() with properties that - * maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc). - * Key value pairs passed into jobProperties is guaranteed to be set in the job's - * configuration object. User's can retrieve "context" information from tableDesc. - * User's should avoid mutating tableDesc and only make changes in jobProperties. - * This method is expected to be idempotent such that a job called with the - * same tableDesc values should return the same key-value pairs in jobProperties. - * Any external state set by this method should remain the same if this method is - * called again. It is up to the user to determine how best guarantee this invariant. - * - * This method in particular is to create a configuration for input. - * @param tableDesc - * @param jobProperties - */ - public abstract void configureInputJobProperties(TableDesc tableDesc, Map jobProperties); - - //TODO move this to HiveStorageHandler - - /** - * This method is called to allow the StorageHandlers the chance - * to populate the JobContext.getConfiguration() with properties that - * maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc). - * Key value pairs passed into jobProperties is guaranteed to be set in the job's - * configuration object. User's can retrieve "context" information from tableDesc. - * User's should avoid mutating tableDesc and only make changes in jobProperties. - * This method is expected to be idempotent such that a job called with the - * same tableDesc values should return the same key-value pairs in jobProperties. - * Any external state set by this method should remain the same if this method is - * called again. It is up to the user to determine how best guarantee this invariant. - * - * This method in particular is to create a configuration for output. - * @param tableDesc - * @param jobProperties - */ - public abstract void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties); - - /** - * - * - * @return authorization provider - * @throws HiveException - */ - public abstract HiveAuthorizationProvider getAuthorizationProvider() - throws HiveException; - - /* - * (non-Javadoc) - * - * @see org.apache.hadoop.hive.ql.metadata.HiveStorageHandler# - * configureTableJobProperties(org.apache.hadoop.hive.ql.plan.TableDesc, - * java.util.Map) - */ - @Override - @Deprecated - public final void configureTableJobProperties(TableDesc tableDesc, - Map jobProperties) { - } - - /* - * (non-Javadoc) - * - * @see org.apache.hadoop.conf.Configurable#getConf() - */ - @Override - public abstract Configuration getConf(); - - /* - * (non-Javadoc) - * - * @see org.apache.hadoop.conf.Configurable#setConf(org.apache.hadoop.conf. - * Configuration) - */ - @Override - public abstract void setConf(Configuration conf); - - OutputFormatContainer getOutputFormatContainer(OutputFormat outputFormat) { - return new DefaultOutputFormatContainer(outputFormat); - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatTableInfo.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatTableInfo.java deleted file mode 100644 index a3984cc..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/HCatTableInfo.java +++ /dev/null @@ -1,187 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.io.Serializable; - -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.schema.HCatSchema; - -/** - * - * HCatTableInfo - class to communicate table information to {@link HCatInputFormat} - * and {@link HCatOutputFormat} - * - */ -public class HCatTableInfo implements Serializable { - - - private static final long serialVersionUID = 1L; - - /** The db and table names */ - private final String databaseName; - private final String tableName; - - /** The table schema. */ - private final HCatSchema dataColumns; - private final HCatSchema partitionColumns; - - /** The table being written to */ - private final Table table; - - /** The storer info */ - private StorerInfo storerInfo; - - /** - * Initializes a new HCatTableInfo instance to be used with {@link HCatInputFormat} - * for reading data from a table. - * work with hadoop security, the kerberos principal name of the server - else null - * The principal name should be of the form: - * /_HOST@ like "hcat/_HOST@myrealm.com" - * The special string _HOST will be replaced automatically with the correct host name - * @param databaseName the db name - * @param tableName the table name - * @param dataColumns schema of columns which contain data - * @param partitionColumns schema of partition columns - * @param storerInfo information about storage descriptor - * @param table hive metastore table class - */ - HCatTableInfo( - String databaseName, - String tableName, - HCatSchema dataColumns, - HCatSchema partitionColumns, - StorerInfo storerInfo, - Table table) { - this.databaseName = (databaseName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : databaseName; - this.tableName = tableName; - this.dataColumns = dataColumns; - this.table = table; - this.storerInfo = storerInfo; - this.partitionColumns = partitionColumns; - } - - /** - * Gets the value of databaseName - * @return the databaseName - */ - public String getDatabaseName() { - return databaseName; - } - - /** - * Gets the value of tableName - * @return the tableName - */ - public String getTableName() { - return tableName; - } - - /** - * @return return schema of data columns as defined in meta store - */ - public HCatSchema getDataColumns() { - return dataColumns; - } - - /** - * @return schema of partition columns - */ - public HCatSchema getPartitionColumns() { - return partitionColumns; - } - - /** - * @return the storerInfo - */ - public StorerInfo getStorerInfo() { - return storerInfo; - } - - public String getTableLocation() { - return table.getSd().getLocation(); - } - - /** - * minimize dependency on hive classes so this is package private - * this should eventually no longer be used - * @return hive metastore representation of table - */ - Table getTable() { - return table; - } - - /** - * create an HCatTableInfo instance from the supplied Hive Table instance - * @param table to create an instance from - * @return HCatTableInfo - * @throws IOException - */ - static HCatTableInfo valueOf(Table table) throws IOException { - // Explicitly use {@link org.apache.hadoop.hive.ql.metadata.Table} when getting the schema, - // but store @{link org.apache.hadoop.hive.metastore.api.Table} as this class is serialized - // into the job conf. - org.apache.hadoop.hive.ql.metadata.Table mTable = - new org.apache.hadoop.hive.ql.metadata.Table(table); - HCatSchema schema = HCatUtil.extractSchema(mTable); - StorerInfo storerInfo = - InternalUtil.extractStorerInfo(table.getSd(), table.getParameters()); - HCatSchema partitionColumns = HCatUtil.getPartitionColumns(mTable); - return new HCatTableInfo(table.getDbName(), table.getTableName(), schema, - partitionColumns, storerInfo, table); - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - HCatTableInfo tableInfo = (HCatTableInfo) o; - - if (dataColumns != null ? !dataColumns.equals(tableInfo.dataColumns) : tableInfo.dataColumns != null) - return false; - if (databaseName != null ? !databaseName.equals(tableInfo.databaseName) : tableInfo.databaseName != null) - return false; - if (partitionColumns != null ? !partitionColumns.equals(tableInfo.partitionColumns) : tableInfo.partitionColumns != null) - return false; - if (storerInfo != null ? !storerInfo.equals(tableInfo.storerInfo) : tableInfo.storerInfo != null) return false; - if (table != null ? !table.equals(tableInfo.table) : tableInfo.table != null) return false; - if (tableName != null ? !tableName.equals(tableInfo.tableName) : tableInfo.tableName != null) return false; - - return true; - } - - - @Override - public int hashCode() { - int result = databaseName != null ? databaseName.hashCode() : 0; - result = 31 * result + (tableName != null ? tableName.hashCode() : 0); - result = 31 * result + (dataColumns != null ? dataColumns.hashCode() : 0); - result = 31 * result + (partitionColumns != null ? partitionColumns.hashCode() : 0); - result = 31 * result + (table != null ? table.hashCode() : 0); - result = 31 * result + (storerInfo != null ? storerInfo.hashCode() : 0); - return result; - } - -} - diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InitializeInput.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InitializeInput.java deleted file mode 100644 index 02ec37f..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InitializeInput.java +++ /dev/null @@ -1,171 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hcatalog.common.ErrorType; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * The Class which handles querying the metadata server using the MetaStoreClient. The list of - * partitions matching the partition filter is fetched from the server and the information is - * serialized and written into the JobContext configuration. The inputInfo is also updated with - * info required in the client process context. - */ -class InitializeInput { - - private static final Logger LOG = LoggerFactory.getLogger(InitializeInput.class); - - /** - * @see org.apache.hcatalog.mapreduce.InitializeInput#setInput(org.apache.hadoop.conf.Configuration, InputJobInfo) - */ - public static void setInput(Job job, InputJobInfo theirInputJobInfo) throws Exception { - setInput(job.getConfiguration(), theirInputJobInfo); - } - - /** - * Set the input to use for the Job. This queries the metadata server with the specified - * partition predicates, gets the matching partitions, and puts the information in the job - * configuration object. - * - * To ensure a known InputJobInfo state, only the database name, table name, filter, and - * properties are preserved. All other modification from the given InputJobInfo are discarded. - * - * After calling setInput, InputJobInfo can be retrieved from the job configuration as follows: - * {code} - * InputJobInfo inputInfo = (InputJobInfo) HCatUtil.deserialize( - * job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)); - * {code} - * - * @param conf the job Configuration object - * @param theirInputJobInfo information on the Input to read - * @throws Exception - */ - public static void setInput(Configuration conf, - InputJobInfo theirInputJobInfo) throws Exception { - InputJobInfo inputJobInfo = InputJobInfo.create( - theirInputJobInfo.getDatabaseName(), - theirInputJobInfo.getTableName(), - theirInputJobInfo.getFilter(), - theirInputJobInfo.getProperties()); - conf.set( - HCatConstants.HCAT_KEY_JOB_INFO, - HCatUtil.serialize(getInputJobInfo(conf, inputJobInfo, null))); - } - - /** - * Returns the given InputJobInfo after populating with data queried from the metadata service. - */ - private static InputJobInfo getInputJobInfo( - Configuration conf, InputJobInfo inputJobInfo, String locationFilter) throws Exception { - HiveMetaStoreClient client = null; - HiveConf hiveConf = null; - try { - if (conf != null) { - hiveConf = HCatUtil.getHiveConf(conf); - } else { - hiveConf = new HiveConf(HCatInputFormat.class); - } - client = HCatUtil.getHiveClient(hiveConf); - Table table = HCatUtil.getTable(client, inputJobInfo.getDatabaseName(), - inputJobInfo.getTableName()); - - List partInfoList = new ArrayList(); - - inputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable())); - if (table.getPartitionKeys().size() != 0) { - //Partitioned table - List parts = client.listPartitionsByFilter(inputJobInfo.getDatabaseName(), - inputJobInfo.getTableName(), - inputJobInfo.getFilter(), - (short) -1); - - // Default to 100,000 partitions if hive.metastore.maxpartition is not defined - int maxPart = hiveConf.getInt("hcat.metastore.maxpartitions", 100000); - if (parts != null && parts.size() > maxPart) { - throw new HCatException(ErrorType.ERROR_EXCEED_MAXPART, "total number of partitions is " + parts.size()); - } - - // populate partition info - for (Partition ptn : parts) { - HCatSchema schema = HCatUtil.extractSchema( - new org.apache.hadoop.hive.ql.metadata.Partition(table, ptn)); - PartInfo partInfo = extractPartInfo(schema, ptn.getSd(), - ptn.getParameters(), conf, inputJobInfo); - partInfo.setPartitionValues(InternalUtil.createPtnKeyValueMap(table, ptn)); - partInfoList.add(partInfo); - } - - } else { - //Non partitioned table - HCatSchema schema = HCatUtil.extractSchema(table); - PartInfo partInfo = extractPartInfo(schema, table.getTTable().getSd(), - table.getParameters(), conf, inputJobInfo); - partInfo.setPartitionValues(new HashMap()); - partInfoList.add(partInfo); - } - inputJobInfo.setPartitions(partInfoList); - - return inputJobInfo; - } finally { - HCatUtil.closeHiveClientQuietly(client); - } - - } - - private static PartInfo extractPartInfo(HCatSchema schema, StorageDescriptor sd, - Map parameters, Configuration conf, - InputJobInfo inputJobInfo) throws IOException { - - StorerInfo storerInfo = InternalUtil.extractStorerInfo(sd, parameters); - - Properties hcatProperties = new Properties(); - HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo); - - // copy the properties from storageHandler to jobProperties - Map jobProperties = HCatUtil.getInputJobProperties(storageHandler, inputJobInfo); - - for (String key : parameters.keySet()) { - hcatProperties.put(key, parameters.get(key)); - } - // FIXME - // Bloating partinfo with inputJobInfo is not good - return new PartInfo(schema, storageHandler, sd.getLocation(), - hcatProperties, jobProperties, inputJobInfo.getTableInfo()); - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InputJobInfo.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InputJobInfo.java deleted file mode 100644 index 4bcec34..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InputJobInfo.java +++ /dev/null @@ -1,201 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.mapreduce; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; - -import java.io.IOException; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; -import java.io.Serializable; -import java.util.List; -import java.util.Properties; -import java.util.zip.Deflater; -import java.util.zip.DeflaterOutputStream; -import java.util.zip.InflaterInputStream; - -/** - * Container for metadata read from the metadata server. - * Prior to release 0.5, InputJobInfo was a key part of the public API, exposed directly - * to end-users as an argument to - * {@link HCatInputFormat#setInput(org.apache.hadoop.mapreduce.Job, InputJobInfo)}. - * Going forward, we plan on treating InputJobInfo as an implementation detail and no longer - * expose to end-users. Should you have a need to use InputJobInfo outside HCatalog itself, - * please contact the developer mailing list before depending on this class. - */ -@InterfaceAudience.Private -@InterfaceStability.Evolving -public class InputJobInfo implements Serializable { - - /** The serialization version */ - private static final long serialVersionUID = 1L; - - /** The db and table names. */ - private final String databaseName; - private final String tableName; - - /** meta information of the table to be read from */ - private HCatTableInfo tableInfo; - - /** The partition filter */ - private String filter; - - /** The list of partitions matching the filter. */ - transient private List partitions; - - /** implementation specific job properties */ - private Properties properties; - - /** - * Initializes a new InputJobInfo - * for reading data from a table. - * @param databaseName the db name - * @param tableName the table name - * @param filter the partition filter - * @param properties implementation specific job properties - */ - public static InputJobInfo create(String databaseName, - String tableName, - String filter, - Properties properties) { - return new InputJobInfo(databaseName, tableName, filter, properties); - } - - /** - * Initializes a new InputJobInfo - * for reading data from a table. - * @param databaseName the db name - * @param tableName the table name - * @param filter the partition filter - */ - @Deprecated - public static InputJobInfo create(String databaseName, - String tableName, - String filter) { - return create(databaseName, tableName, filter, null); - } - - - private InputJobInfo(String databaseName, - String tableName, - String filter, - Properties properties) { - this.databaseName = (databaseName == null) ? - MetaStoreUtils.DEFAULT_DATABASE_NAME : databaseName; - this.tableName = tableName; - this.filter = filter; - this.properties = properties == null ? new Properties() : properties; - } - - /** - * Gets the value of databaseName - * @return the databaseName - */ - public String getDatabaseName() { - return databaseName; - } - - /** - * Gets the value of tableName - * @return the tableName - */ - public String getTableName() { - return tableName; - } - - /** - * Gets the table's meta information - * @return the HCatTableInfo - */ - public HCatTableInfo getTableInfo() { - return tableInfo; - } - - /** - * set the tablInfo instance - * this should be the same instance - * determined by this object's DatabaseName and TableName - * @param tableInfo - */ - void setTableInfo(HCatTableInfo tableInfo) { - this.tableInfo = tableInfo; - } - - /** - * Gets the value of partition filter - * @return the filter string - */ - public String getFilter() { - return filter; - } - - /** - * @return partition info - */ - public List getPartitions() { - return partitions; - } - - /** - * @return partition info list - */ - void setPartitions(List partitions) { - this.partitions = partitions; - } - - /** - * Set/Get Property information to be passed down to *StorageHandler implementation - * put implementation specific storage handler configurations here - * @return the implementation specific job properties - */ - public Properties getProperties() { - return properties; - } - - /** - * Serialize this object, compressing the partitions which can exceed the - * allowed jobConf size. - * @see HCATALOG-453 - */ - private void writeObject(ObjectOutputStream oos) - throws IOException { - oos.defaultWriteObject(); - Deflater def = new Deflater(Deflater.BEST_COMPRESSION); - ObjectOutputStream partInfoWriter = - new ObjectOutputStream(new DeflaterOutputStream(oos, def)); - partInfoWriter.writeObject(partitions); - partInfoWriter.close(); - } - - /** - * Deserialize this object, decompressing the partitions which can exceed the - * allowed jobConf size. - * @see HCATALOG-453 - */ - @SuppressWarnings("unchecked") - private void readObject(ObjectInputStream ois) - throws IOException, ClassNotFoundException { - ois.defaultReadObject(); - ObjectInputStream partInfoReader = - new ObjectInputStream(new InflaterInputStream(ois)); - partitions = (List)partInfoReader.readObject(); - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InternalUtil.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InternalUtil.java deleted file mode 100644 index 4167afa..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/InternalUtil.java +++ /dev/null @@ -1,218 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.serde2.Deserializer; -import org.apache.hadoop.hive.serde2.SerDe; -import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -class InternalUtil { - private static final Logger LOG = LoggerFactory.getLogger(InternalUtil.class); - - static StorerInfo extractStorerInfo(StorageDescriptor sd, Map properties) throws IOException { - Properties hcatProperties = new Properties(); - for (String key : properties.keySet()) { - hcatProperties.put(key, properties.get(key)); - } - - // also populate with StorageDescriptor->SerDe.Parameters - for (Map.Entry param : - sd.getSerdeInfo().getParameters().entrySet()) { - hcatProperties.put(param.getKey(), param.getValue()); - } - - - return new StorerInfo( - sd.getInputFormat(), sd.getOutputFormat(), sd.getSerdeInfo().getSerializationLib(), - properties.get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE), - hcatProperties); - } - - static StructObjectInspector createStructObjectInspector(HCatSchema outputSchema) throws IOException { - - if (outputSchema == null) { - throw new IOException("Invalid output schema specified"); - } - - List fieldInspectors = new ArrayList(); - List fieldNames = new ArrayList(); - - for (HCatFieldSchema hcatFieldSchema : outputSchema.getFields()) { - TypeInfo type = TypeInfoUtils.getTypeInfoFromTypeString(hcatFieldSchema.getTypeString()); - - fieldNames.add(hcatFieldSchema.getName()); - fieldInspectors.add(getObjectInspector(type)); - } - - StructObjectInspector structInspector = ObjectInspectorFactory. - getStandardStructObjectInspector(fieldNames, fieldInspectors); - return structInspector; - } - - private static ObjectInspector getObjectInspector(TypeInfo type) throws IOException { - - switch (type.getCategory()) { - - case PRIMITIVE: - PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) type; - return PrimitiveObjectInspectorFactory. - getPrimitiveJavaObjectInspector(primitiveType.getPrimitiveCategory()); - - case MAP: - MapTypeInfo mapType = (MapTypeInfo) type; - MapObjectInspector mapInspector = ObjectInspectorFactory.getStandardMapObjectInspector( - getObjectInspector(mapType.getMapKeyTypeInfo()), getObjectInspector(mapType.getMapValueTypeInfo())); - return mapInspector; - - case LIST: - ListTypeInfo listType = (ListTypeInfo) type; - ListObjectInspector listInspector = ObjectInspectorFactory.getStandardListObjectInspector( - getObjectInspector(listType.getListElementTypeInfo())); - return listInspector; - - case STRUCT: - StructTypeInfo structType = (StructTypeInfo) type; - List fieldTypes = structType.getAllStructFieldTypeInfos(); - - List fieldInspectors = new ArrayList(); - for (TypeInfo fieldType : fieldTypes) { - fieldInspectors.add(getObjectInspector(fieldType)); - } - - StructObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector( - structType.getAllStructFieldNames(), fieldInspectors); - return structInspector; - - default: - throw new IOException("Unknown field schema type"); - } - } - - //TODO this has to find a better home, it's also hardcoded as default in hive would be nice - // if the default was decided by the serde - static void initializeOutputSerDe(SerDe serDe, Configuration conf, OutputJobInfo jobInfo) - throws SerDeException { - serDe.initialize(conf, getSerdeProperties(jobInfo.getTableInfo(), jobInfo.getOutputSchema())); - } - - static void initializeDeserializer(Deserializer deserializer, Configuration conf, - HCatTableInfo info, HCatSchema schema) throws SerDeException { - Properties props = getSerdeProperties(info, schema); - LOG.info("Initializing " + deserializer.getClass().getName() + " with properties " + props); - deserializer.initialize(conf, props); - } - - private static Properties getSerdeProperties(HCatTableInfo info, HCatSchema s) - throws SerDeException { - Properties props = new Properties(); - List fields = HCatUtil.getFieldSchemaList(s.getFields()); - props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMNS, - MetaStoreUtils.getColumnNamesFromFieldSchema(fields)); - props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMN_TYPES, - MetaStoreUtils.getColumnTypesFromFieldSchema(fields)); - - // setting these props to match LazySimpleSerde - props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_NULL_FORMAT, "\\N"); - props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT, "1"); - - //add props from params set in table schema - props.putAll(info.getStorerInfo().getProperties()); - - return props; - } - - static Reporter createReporter(TaskAttemptContext context) { - return new ProgressReporter(context); - } - - /** - * Casts an InputSplit into a HCatSplit, providing a useful error message if the cast fails. - * @param split the InputSplit - * @return the HCatSplit - * @throws IOException - */ - public static HCatSplit castToHCatSplit(InputSplit split) throws IOException { - if (split instanceof HCatSplit) { - return (HCatSplit) split; - } else { - throw new IOException("Split must be " + HCatSplit.class.getName() - + " but found " + split.getClass().getName()); - } - } - - - static Map createPtnKeyValueMap(Table table, Partition ptn) - throws IOException { - List values = ptn.getValues(); - if (values.size() != table.getPartitionKeys().size()) { - throw new IOException( - "Partition values in partition inconsistent with table definition, table " - + table.getTableName() + " has " - + table.getPartitionKeys().size() - + " partition keys, partition has " + values.size() - + "partition values"); - } - - Map ptnKeyValues = new HashMap(); - - int i = 0; - for (FieldSchema schema : table.getPartitionKeys()) { - // CONCERN : the way this mapping goes, the order *needs* to be - // preserved for table.getPartitionKeys() and ptn.getValues() - ptnKeyValues.put(schema.getName().toLowerCase(), values.get(i)); - i++; - } - - return ptnKeyValues; - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/MultiOutputFormat.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/MultiOutputFormat.java deleted file mode 100644 index 3458a25..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/MultiOutputFormat.java +++ /dev/null @@ -1,623 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; - -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.shims.HadoopShims; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.JobStatus.State; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.TaskInputOutputContext; -import org.apache.hadoop.util.ReflectionUtils; -import org.apache.hcatalog.common.HCatUtil; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * The MultiOutputFormat class simplifies writing output data to multiple - * outputs. - *

- * Multiple output formats can be defined each with its own - * OutputFormat class, own key class and own value class. Any - * configuration on these output format classes can be done without interfering - * with other output format's configuration. - *

- * Usage pattern for job submission: - * - *

- *
- * Job job = new Job();
- *
- * FileInputFormat.setInputPath(job, inDir);
- *
- * job.setMapperClass(WordCountMap.class);
- * job.setReducerClass(WordCountReduce.class);
- * job.setInputFormatClass(TextInputFormat.class);
- * job.setOutputFormatClass(MultiOutputFormat.class);
- * // Need not define OutputKeyClass and OutputValueClass. They default to
- * // Writable.class
- * job.setMapOutputKeyClass(Text.class);
- * job.setMapOutputValueClass(IntWritable.class);
- *
- *
- * // Create a JobConfigurer that will configure the job with the multiple
- * // output format information.
- * JobConfigurer configurer = MultiOutputFormat.createConfigurer(job);
- *
- * // Defines additional single text based output 'text' for the job.
- * // Any configuration for the defined OutputFormat should be done with
- * // the Job obtained with configurer.getJob() method.
- * configurer.addOutputFormat("text", TextOutputFormat.class,
- *                 IntWritable.class, Text.class);
- * FileOutputFormat.setOutputPath(configurer.getJob("text"), textOutDir);
- *
- * // Defines additional sequence-file based output 'sequence' for the job
- * configurer.addOutputFormat("sequence", SequenceFileOutputFormat.class,
- *                 Text.class, IntWritable.class);
- * FileOutputFormat.setOutputPath(configurer.getJob("sequence"), seqOutDir);
- * ...
- * // configure method to be called on the JobConfigurer once all the
- * // output formats have been defined and configured.
- * configurer.configure();
- *
- * job.waitForCompletion(true);
- * ...
- * 
- *

- * Usage in Reducer: - * - *

- * public class WordCountReduce extends
- *         Reducer<Text, IntWritable, Writable, Writable> {
- *
- *     private IntWritable count = new IntWritable();
- *
- *     public void reduce(Text word, Iterator<IntWritable> values,
- *             Context context)
- *             throws IOException {
- *         int sum = 0;
- *         for (IntWritable val : values) {
- *             sum += val.get();
- *         }
- *         count.set(sum);
- *         MultiOutputFormat.write("text", count, word, context);
- *         MultiOutputFormat.write("sequence", word, count, context);
- *     }
- *
- * }
- *
- * 
- * - * Map only jobs: - *

- * MultiOutputFormat.write("output", key, value, context); can be called similar - * to a reducer in map only jobs. - * - */ -public class MultiOutputFormat extends OutputFormat { - - private static final Logger LOGGER = LoggerFactory.getLogger(MultiOutputFormat.class.getName()); - private static final String MO_ALIASES = "mapreduce.multiout.aliases"; - private static final String MO_ALIAS = "mapreduce.multiout.alias"; - private static final String CONF_KEY_DELIM = "%%"; - private static final String CONF_VALUE_DELIM = ";;"; - private static final String COMMA_DELIM = ","; - private static final List configsToOverride = new ArrayList(); - private static final Map configsToMerge = new HashMap(); - - static { - configsToOverride.add("mapred.output.dir"); - configsToOverride.add(ShimLoader.getHadoopShims().getHCatShim().getPropertyName( - HadoopShims.HCatHadoopShims.PropertyName.CACHE_SYMLINK)); - configsToMerge.put(JobContext.JOB_NAMENODES, COMMA_DELIM); - configsToMerge.put("tmpfiles", COMMA_DELIM); - configsToMerge.put("tmpjars", COMMA_DELIM); - configsToMerge.put("tmparchives", COMMA_DELIM); - configsToMerge.put(ShimLoader.getHadoopShims().getHCatShim().getPropertyName( - HadoopShims.HCatHadoopShims.PropertyName.CACHE_ARCHIVES), COMMA_DELIM); - configsToMerge.put(ShimLoader.getHadoopShims().getHCatShim().getPropertyName( - HadoopShims.HCatHadoopShims.PropertyName.CACHE_FILES), COMMA_DELIM); - String fileSep; - if (HCatUtil.isHadoop23()) { - fileSep = ","; - } else { - fileSep = System.getProperty("path.separator"); - } - configsToMerge.put("mapred.job.classpath.archives", fileSep); - configsToMerge.put("mapred.job.classpath.files", fileSep); - } - - /** - * Get a JobConfigurer instance that will support configuration of the job - * for multiple output formats. - * - * @param job the mapreduce job to be submitted - * @return JobConfigurer - */ - public static JobConfigurer createConfigurer(Job job) { - return JobConfigurer.create(job); - } - - /** - * Get the JobContext with the related OutputFormat configuration populated given the alias - * and the actual JobContext - * @param alias the name given to the OutputFormat configuration - * @param context the JobContext - * @return a copy of the JobContext with the alias configuration populated - */ - public static JobContext getJobContext(String alias, JobContext context) { - String aliasConf = context.getConfiguration().get(getAliasConfName(alias)); - JobContext aliasContext = ShimLoader.getHadoopShims().getHCatShim().createJobContext( - context.getConfiguration(), context.getJobID()); - addToConfig(aliasConf, aliasContext.getConfiguration()); - return aliasContext; - } - - /** - * Get the TaskAttemptContext with the related OutputFormat configuration populated given the alias - * and the actual TaskAttemptContext - * @param alias the name given to the OutputFormat configuration - * @param context the Mapper or Reducer Context - * @return a copy of the TaskAttemptContext with the alias configuration populated - */ - public static TaskAttemptContext getTaskAttemptContext(String alias, TaskAttemptContext context) { - String aliasConf = context.getConfiguration().get(getAliasConfName(alias)); - TaskAttemptContext aliasContext = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( - context.getConfiguration(), context.getTaskAttemptID()); - addToConfig(aliasConf, aliasContext.getConfiguration()); - return aliasContext; - } - - /** - * Write the output key and value using the OutputFormat defined by the - * alias. - * - * @param alias the name given to the OutputFormat configuration - * @param key the output key to be written - * @param value the output value to be written - * @param context the Mapper or Reducer Context - * @throws IOException - * @throws InterruptedException - */ - public static void write(String alias, K key, V value, TaskInputOutputContext context) - throws IOException, InterruptedException { - KeyValue keyval = new KeyValue(key, value); - context.write(new Text(alias), keyval); - } - - @Override - public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { - for (String alias : getOutputFormatAliases(context)) { - LOGGER.debug("Calling checkOutputSpecs for alias: " + alias); - JobContext aliasContext = getJobContext(alias, context); - OutputFormat outputFormat = getOutputFormatInstance(aliasContext); - outputFormat.checkOutputSpecs(aliasContext); - // Copy credentials and any new config added back to JobContext - context.getCredentials().addAll(aliasContext.getCredentials()); - setAliasConf(alias, context, aliasContext); - } - } - - @Override - public RecordWriter getRecordWriter(TaskAttemptContext context) - throws IOException, - InterruptedException { - return new MultiRecordWriter(context); - } - - @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, - InterruptedException { - return new MultiOutputCommitter(context); - } - - private static OutputFormat getOutputFormatInstance(JobContext context) { - OutputFormat outputFormat; - try { - outputFormat = ReflectionUtils.newInstance(context.getOutputFormatClass(), - context.getConfiguration()); - } catch (ClassNotFoundException e) { - throw new IllegalStateException(e); - } - return outputFormat; - } - - private static String[] getOutputFormatAliases(JobContext context) { - return context.getConfiguration().getStrings(MO_ALIASES); - } - - /** - * Compare the aliasContext with userJob and add the differing configuration - * as mapreduce.multiout.alias..conf to the userJob. - *

- * Merge config like tmpjars, tmpfile, tmparchives, - * mapreduce.job.hdfs-servers that are directly handled by JobClient and add - * them to userJob. - *

- * Add mapred.output.dir config to userJob. - * - * @param alias alias name associated with a OutputFormat - * @param userJob reference to Job that the user is going to submit - * @param aliasContext JobContext populated with OutputFormat related - * configuration. - */ - private static void setAliasConf(String alias, JobContext userJob, JobContext aliasContext) { - Configuration userConf = userJob.getConfiguration(); - StringBuilder builder = new StringBuilder(); - for (Entry conf : aliasContext.getConfiguration()) { - String key = conf.getKey(); - String value = conf.getValue(); - String jobValue = userConf.getRaw(key); - if (jobValue == null || !jobValue.equals(value)) { - if (configsToMerge.containsKey(key)) { - String mergedValue = getMergedConfValue(jobValue, value, configsToMerge.get(key)); - userConf.set(key, mergedValue); - } else { - if (configsToOverride.contains(key)) { - userConf.set(key, value); - } - builder.append(key).append(CONF_KEY_DELIM).append(value) - .append(CONF_VALUE_DELIM); - } - } - } - if (builder.length() > CONF_VALUE_DELIM.length()) { - builder.delete(builder.length() - CONF_VALUE_DELIM.length(), builder.length()); - userConf.set(getAliasConfName(alias), builder.toString()); - } - } - - private static String getMergedConfValue(String originalValues, String newValues, String separator) { - if (originalValues == null) { - return newValues; - } - Set mergedValues = new LinkedHashSet(); - mergedValues.addAll(Arrays.asList(StringUtils.split(originalValues, separator))); - mergedValues.addAll(Arrays.asList(StringUtils.split(newValues, separator))); - StringBuilder builder = new StringBuilder(originalValues.length() + newValues.length() + 2); - for (String value : mergedValues) { - builder.append(value).append(separator); - } - return builder.substring(0, builder.length() - separator.length()); - } - - private static String getAliasConfName(String alias) { - return MO_ALIAS + "." + alias + ".conf"; - } - - private static void addToConfig(String aliasConf, Configuration conf) { - String[] config = aliasConf.split(CONF_KEY_DELIM + "|" + CONF_VALUE_DELIM); - for (int i = 0; i < config.length; i += 2) { - conf.set(config[i], config[i + 1]); - } - } - - /** - * Class that supports configuration of the job for multiple output formats. - */ - public static class JobConfigurer { - - private final Job job; - private Map outputConfigs = new LinkedHashMap(); - - private JobConfigurer(Job job) { - this.job = job; - } - - private static JobConfigurer create(Job job) { - JobConfigurer configurer = new JobConfigurer(job); - return configurer; - } - - /** - * Add a OutputFormat configuration to the Job with a alias name. - * - * @param alias the name to be given to the OutputFormat configuration - * @param outputFormatClass OutputFormat class - * @param keyClass the key class for the output data - * @param valueClass the value class for the output data - * @throws IOException - */ - public void addOutputFormat(String alias, - Class outputFormatClass, - Class keyClass, Class valueClass) throws IOException { - Job copy = new Job(this.job.getConfiguration()); - outputConfigs.put(alias, copy); - copy.setOutputFormatClass(outputFormatClass); - copy.setOutputKeyClass(keyClass); - copy.setOutputValueClass(valueClass); - } - - /** - * Get the Job configuration for a OutputFormat defined by the alias - * name. The job returned by this method should be passed to the - * OutputFormat for any configuration instead of the Job that will be - * submitted to the JobClient. - * - * @param alias the name used for the OutputFormat during - * addOutputFormat - * @return Job - */ - public Job getJob(String alias) { - Job copy = outputConfigs.get(alias); - if (copy == null) { - throw new IllegalArgumentException("OutputFormat with alias " + alias - + " has not beed added"); - } - return copy; - } - - /** - * Configure the job with the multiple output formats added. This method - * should be called after all the output formats have been added and - * configured and before the job submission. - */ - public void configure() { - StringBuilder aliases = new StringBuilder(); - Configuration jobConf = job.getConfiguration(); - for (Entry entry : outputConfigs.entrySet()) { - // Copy credentials - job.getCredentials().addAll(entry.getValue().getCredentials()); - String alias = entry.getKey(); - aliases.append(alias).append(COMMA_DELIM); - // Store the differing configuration for each alias in the job - // as a setting. - setAliasConf(alias, job, entry.getValue()); - } - aliases.delete(aliases.length() - COMMA_DELIM.length(), aliases.length()); - jobConf.set(MO_ALIASES, aliases.toString()); - } - - } - - private static class KeyValue implements Writable { - private final K key; - private final V value; - - public KeyValue(K key, V value) { - this.key = key; - this.value = value; - } - - public K getKey() { - return key; - } - - public V getValue() { - return value; - } - - @Override - public void write(DataOutput out) throws IOException { - // Ignore. Not required as this will be never - // serialized/deserialized. - } - - @Override - public void readFields(DataInput in) throws IOException { - // Ignore. Not required as this will be never - // serialized/deserialized. - } - } - - private static class MultiRecordWriter extends RecordWriter { - - private final Map baseRecordWriters; - - public MultiRecordWriter(TaskAttemptContext context) throws IOException, - InterruptedException { - baseRecordWriters = new LinkedHashMap(); - String[] aliases = getOutputFormatAliases(context); - for (String alias : aliases) { - LOGGER.info("Creating record writer for alias: " + alias); - TaskAttemptContext aliasContext = getTaskAttemptContext(alias, context); - Configuration aliasConf = aliasContext.getConfiguration(); - // Create output directory if not already created. - String outDir = aliasConf.get("mapred.output.dir"); - if (outDir != null) { - Path outputDir = new Path(outDir); - FileSystem fs = outputDir.getFileSystem(aliasConf); - if (!fs.exists(outputDir)) { - fs.mkdirs(outputDir); - } - } - OutputFormat outputFormat = getOutputFormatInstance(aliasContext); - baseRecordWriters.put(alias, - new BaseRecordWriterContainer(outputFormat.getRecordWriter(aliasContext), - aliasContext)); - } - } - - @Override - public void write(Writable key, Writable value) throws IOException, InterruptedException { - Text _key = (Text) key; - KeyValue _value = (KeyValue) value; - String alias = new String(_key.getBytes(), 0, _key.getLength()); - BaseRecordWriterContainer baseRWContainer = baseRecordWriters.get(alias); - if (baseRWContainer == null) { - throw new IllegalArgumentException("OutputFormat with alias " + alias - + " has not been added"); - } - baseRWContainer.getRecordWriter().write(_value.getKey(), _value.getValue()); - } - - @Override - public void close(TaskAttemptContext context) throws IOException, InterruptedException { - for (Entry entry : baseRecordWriters.entrySet()) { - BaseRecordWriterContainer baseRWContainer = entry.getValue(); - LOGGER.info("Closing record writer for alias: " + entry.getKey()); - baseRWContainer.getRecordWriter().close(baseRWContainer.getContext()); - } - } - - } - - private static class BaseRecordWriterContainer { - - private final RecordWriter recordWriter; - private final TaskAttemptContext context; - - public BaseRecordWriterContainer(RecordWriter recordWriter, TaskAttemptContext context) { - this.recordWriter = recordWriter; - this.context = context; - } - - public RecordWriter getRecordWriter() { - return recordWriter; - } - - public TaskAttemptContext getContext() { - return context; - } - } - - public class MultiOutputCommitter extends OutputCommitter { - - private final Map outputCommitters; - - public MultiOutputCommitter(TaskAttemptContext context) throws IOException, - InterruptedException { - outputCommitters = new LinkedHashMap(); - String[] aliases = getOutputFormatAliases(context); - for (String alias : aliases) { - LOGGER.info("Creating output committer for alias: " + alias); - TaskAttemptContext aliasContext = getTaskAttemptContext(alias, context); - OutputCommitter baseCommitter = getOutputFormatInstance(aliasContext) - .getOutputCommitter(aliasContext); - outputCommitters.put(alias, - new BaseOutputCommitterContainer(baseCommitter, aliasContext)); - } - } - - @Override - public void setupJob(JobContext jobContext) throws IOException { - for (String alias : outputCommitters.keySet()) { - LOGGER.info("Calling setupJob for alias: " + alias); - BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); - outputContainer.getBaseCommitter().setupJob(outputContainer.getContext()); - } - } - - @Override - public void setupTask(TaskAttemptContext taskContext) throws IOException { - for (String alias : outputCommitters.keySet()) { - LOGGER.info("Calling setupTask for alias: " + alias); - BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); - outputContainer.getBaseCommitter().setupTask(outputContainer.getContext()); - } - } - - @Override - public boolean needsTaskCommit(TaskAttemptContext taskContext) throws IOException { - boolean needTaskCommit = false; - for (String alias : outputCommitters.keySet()) { - BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); - needTaskCommit = needTaskCommit - || outputContainer.getBaseCommitter().needsTaskCommit( - outputContainer.getContext()); - } - return needTaskCommit; - } - - @Override - public void commitTask(TaskAttemptContext taskContext) throws IOException { - for (String alias : outputCommitters.keySet()) { - BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); - OutputCommitter baseCommitter = outputContainer.getBaseCommitter(); - TaskAttemptContext committerContext = outputContainer.getContext(); - if (baseCommitter.needsTaskCommit(committerContext)) { - LOGGER.info("Calling commitTask for alias: " + alias); - baseCommitter.commitTask(committerContext); - } - } - } - - @Override - public void abortTask(TaskAttemptContext taskContext) throws IOException { - for (String alias : outputCommitters.keySet()) { - LOGGER.info("Calling abortTask for alias: " + alias); - BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); - outputContainer.getBaseCommitter().abortTask(outputContainer.getContext()); - } - } - - @Override - public void commitJob(JobContext jobContext) throws IOException { - for (String alias : outputCommitters.keySet()) { - LOGGER.info("Calling commitJob for alias: " + alias); - BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); - outputContainer.getBaseCommitter().commitJob(outputContainer.getContext()); - } - } - - @Override - public void abortJob(JobContext jobContext, State state) throws IOException { - for (String alias : outputCommitters.keySet()) { - LOGGER.info("Calling abortJob for alias: " + alias); - BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); - outputContainer.getBaseCommitter().abortJob(outputContainer.getContext(), state); - } - } - } - - private static class BaseOutputCommitterContainer { - - private final OutputCommitter outputCommitter; - private final TaskAttemptContext context; - - public BaseOutputCommitterContainer(OutputCommitter outputCommitter, - TaskAttemptContext context) { - this.outputCommitter = outputCommitter; - this.context = context; - } - - public OutputCommitter getBaseCommitter() { - return outputCommitter; - } - - public TaskAttemptContext getContext() { - return context; - } - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/OutputCommitterContainer.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/OutputCommitterContainer.java deleted file mode 100644 index 7d7d361..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/OutputCommitterContainer.java +++ /dev/null @@ -1,47 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.OutputCommitter; - -/** - * This class will contain an implementation of an OutputCommitter. - * See {@link OutputFormatContainer} for more information about containers. - */ -abstract class OutputCommitterContainer extends OutputCommitter { - private final org.apache.hadoop.mapred.OutputCommitter committer; - - /** - * @param context current JobContext - * @param committer OutputCommitter that this instance will contain - */ - public OutputCommitterContainer(JobContext context, org.apache.hadoop.mapred.OutputCommitter committer) { - this.committer = committer; - } - - /** - * @return underlying OutputCommitter - */ - public OutputCommitter getBaseOutputCommitter() { - return committer; - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/OutputFormatContainer.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/OutputFormatContainer.java deleted file mode 100644 index 5346ea0..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/OutputFormatContainer.java +++ /dev/null @@ -1,53 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.hcatalog.data.HCatRecord; - -/** - * This container class is used to wrap OutputFormat implementations and augment them with - * behavior necessary to work with HCatalog (ie metastore updates, hcatalog delegation tokens, etc). - * Containers are also used to provide storage specific implementations of some HCatalog features (ie dynamic partitioning). - * Hence users wishing to create storage specific implementations of HCatalog features should implement this class and override - * HCatStorageHandler.getOutputFormatContainer(OutputFormat outputFormat) to return the implementation. - * By default DefaultOutputFormatContainer is used, which only implements the bare minimum features HCatalog features - * such as partitioning isn't supported. - */ -abstract class OutputFormatContainer extends OutputFormat, HCatRecord> { - private org.apache.hadoop.mapred.OutputFormat, ? super Writable> of; - - /** - * @param of OutputFormat this instance will contain - */ - public OutputFormatContainer(org.apache.hadoop.mapred.OutputFormat, ? super Writable> of) { - this.of = of; - } - - /** - * @return underlying OutputFormat - */ - public org.apache.hadoop.mapred.OutputFormat getBaseOutputFormat() { - return of; - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/OutputJobInfo.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/OutputJobInfo.java deleted file mode 100644 index 5066179..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/OutputJobInfo.java +++ /dev/null @@ -1,270 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.Serializable; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hcatalog.data.schema.HCatSchema; - -/** The class used to serialize and store the output related information */ -public class OutputJobInfo implements Serializable { - - /** The db and table names. */ - private final String databaseName; - private final String tableName; - - /** The serialization version. */ - private static final long serialVersionUID = 1L; - - /** The table info provided by user. */ - private HCatTableInfo tableInfo; - - /** The output schema. This is given to us by user. This wont contain any - * partition columns ,even if user has specified them. - * */ - private HCatSchema outputSchema; - - /** The location of the partition being written */ - private String location; - - /** The partition values to publish to, if used for output*/ - private Map partitionValues; - - private List posOfPartCols; - private List posOfDynPartCols; - - private Properties properties; - - private int maxDynamicPartitions; - - /** List of keys for which values were not specified at write setup time, to be infered at write time */ - private List dynamicPartitioningKeys; - - private boolean harRequested; - - /** - * Initializes a new OutputJobInfo instance - * for writing data from a table. - * @param databaseName the db name - * @param tableName the table name - * @param partitionValues The partition values to publish to, can be null or empty Map to - * work with hadoop security, the kerberos principal name of the server - else null - * The principal name should be of the form: - * /_HOST@ like "hcat/_HOST@myrealm.com" - * The special string _HOST will be replaced automatically with the correct host name - * indicate write to a unpartitioned table. For partitioned tables, this map should - * contain keys for all partition columns with corresponding values. - */ - public static OutputJobInfo create(String databaseName, - String tableName, - Map partitionValues) { - return new OutputJobInfo(databaseName, - tableName, - partitionValues); - } - - private OutputJobInfo(String databaseName, - String tableName, - Map partitionValues) { - this.databaseName = (databaseName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : databaseName; - this.tableName = tableName; - this.partitionValues = partitionValues; - this.properties = new Properties(); - } - - /** - * @return the posOfPartCols - */ - protected List getPosOfPartCols() { - return posOfPartCols; - } - - /** - * @return the posOfDynPartCols - */ - protected List getPosOfDynPartCols() { - return posOfDynPartCols; - } - - /** - * @param posOfPartCols the posOfPartCols to set - */ - protected void setPosOfPartCols(List posOfPartCols) { - // sorting the list in the descending order so that deletes happen back-to-front - Collections.sort(posOfPartCols, new Comparator() { - @Override - public int compare(Integer earlier, Integer later) { - return (earlier > later) ? -1 : ((earlier == later) ? 0 : 1); - } - }); - this.posOfPartCols = posOfPartCols; - } - - /** - * @param posOfDynPartCols the posOfDynPartCols to set - */ - protected void setPosOfDynPartCols(List posOfDynPartCols) { - // Important - no sorting here! We retain order, it's used to match with values at runtime - this.posOfDynPartCols = posOfDynPartCols; - } - - /** - * @return the tableInfo - */ - public HCatTableInfo getTableInfo() { - return tableInfo; - } - - /** - * @return the outputSchema - */ - public HCatSchema getOutputSchema() { - return outputSchema; - } - - /** - * @param schema the outputSchema to set - */ - public void setOutputSchema(HCatSchema schema) { - this.outputSchema = schema; - } - - /** - * @return the location - */ - public String getLocation() { - return location; - } - - /** - * @param location location to write to - */ - public void setLocation(String location) { - this.location = location; - } - - /** - * Sets the value of partitionValues - * @param partitionValues the partition values to set - */ - void setPartitionValues(Map partitionValues) { - this.partitionValues = partitionValues; - } - - /** - * Gets the value of partitionValues - * @return the partitionValues - */ - public Map getPartitionValues() { - return partitionValues; - } - - /** - * set the tablInfo instance - * this should be the same instance - * determined by this object's DatabaseName and TableName - * @param tableInfo - */ - void setTableInfo(HCatTableInfo tableInfo) { - this.tableInfo = tableInfo; - } - - /** - * @return database name of table to write to - */ - public String getDatabaseName() { - return databaseName; - } - - /** - * @return name of table to write to - */ - public String getTableName() { - return tableName; - } - - /** - * Set/Get Property information to be passed down to *StorageHandler implementation - * put implementation specific storage handler configurations here - * @return the implementation specific job properties - */ - public Properties getProperties() { - return properties; - } - - /** - * Set maximum number of allowable dynamic partitions - * @param maxDynamicPartitions - */ - public void setMaximumDynamicPartitions(int maxDynamicPartitions) { - this.maxDynamicPartitions = maxDynamicPartitions; - } - - /** - * Returns maximum number of allowable dynamic partitions - * @return maximum number of allowable dynamic partitions - */ - public int getMaxDynamicPartitions() { - return this.maxDynamicPartitions; - } - - /** - * Sets whether or not hadoop archiving has been requested for this job - * @param harRequested - */ - public void setHarRequested(boolean harRequested) { - this.harRequested = harRequested; - } - - /** - * Returns whether or not hadoop archiving has been requested for this job - * @return whether or not hadoop archiving has been requested for this job - */ - public boolean getHarRequested() { - return this.harRequested; - } - - /** - * Returns whether or not Dynamic Partitioning is used - * @return whether or not dynamic partitioning is currently enabled and used - */ - public boolean isDynamicPartitioningUsed() { - return !((dynamicPartitioningKeys == null) || (dynamicPartitioningKeys.isEmpty())); - } - - /** - * Sets the list of dynamic partitioning keys used for outputting without specifying all the keys - * @param dynamicPartitioningKeys - */ - public void setDynamicPartitioningKeys(List dynamicPartitioningKeys) { - this.dynamicPartitioningKeys = dynamicPartitioningKeys; - } - - public List getDynamicPartitioningKeys() { - return this.dynamicPartitioningKeys; - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/PartInfo.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/PartInfo.java deleted file mode 100644 index 0042a0e..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/PartInfo.java +++ /dev/null @@ -1,164 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.mapreduce; - -import java.io.Serializable; -import java.util.Map; -import java.util.Properties; - -import org.apache.hcatalog.data.schema.HCatSchema; - -/** The Class used to serialize the partition information read from the metadata server that maps to a partition. */ -public class PartInfo implements Serializable { - - /** The serialization version */ - private static final long serialVersionUID = 1L; - - /** The partition schema. */ - private final HCatSchema partitionSchema; - - /** The information about which input storage handler to use */ - private final String storageHandlerClassName; - private final String inputFormatClassName; - private final String outputFormatClassName; - private final String serdeClassName; - - /** HCat-specific properties set at the partition */ - private final Properties hcatProperties; - - /** The data location. */ - private final String location; - - /** The map of partition key names and their values. */ - private Map partitionValues; - - /** Job properties associated with this parition */ - Map jobProperties; - - /** the table info associated with this partition */ - HCatTableInfo tableInfo; - - /** - * Instantiates a new hcat partition info. - * @param partitionSchema the partition schema - * @param storageHandler the storage handler - * @param location the location - * @param hcatProperties hcat-specific properties at the partition - * @param jobProperties the job properties - * @param tableInfo the table information - */ - public PartInfo(HCatSchema partitionSchema, HCatStorageHandler storageHandler, - String location, Properties hcatProperties, - Map jobProperties, HCatTableInfo tableInfo) { - this.partitionSchema = partitionSchema; - this.location = location; - this.hcatProperties = hcatProperties; - this.jobProperties = jobProperties; - this.tableInfo = tableInfo; - - this.storageHandlerClassName = storageHandler.getClass().getName(); - this.inputFormatClassName = storageHandler.getInputFormatClass().getName(); - this.serdeClassName = storageHandler.getSerDeClass().getName(); - this.outputFormatClassName = storageHandler.getOutputFormatClass().getName(); - } - - /** - * Gets the value of partitionSchema. - * @return the partitionSchema - */ - public HCatSchema getPartitionSchema() { - return partitionSchema; - } - - /** - * @return the storage handler class name - */ - public String getStorageHandlerClassName() { - return storageHandlerClassName; - } - - /** - * @return the inputFormatClassName - */ - public String getInputFormatClassName() { - return inputFormatClassName; - } - - /** - * @return the outputFormatClassName - */ - public String getOutputFormatClassName() { - return outputFormatClassName; - } - - /** - * @return the serdeClassName - */ - public String getSerdeClassName() { - return serdeClassName; - } - - /** - * Gets the input storage handler properties. - * @return HCat-specific properties set at the partition - */ - public Properties getInputStorageHandlerProperties() { - return hcatProperties; - } - - /** - * Gets the value of location. - * @return the location - */ - public String getLocation() { - return location; - } - - /** - * Sets the partition values. - * @param partitionValues the new partition values - */ - public void setPartitionValues(Map partitionValues) { - this.partitionValues = partitionValues; - } - - /** - * Gets the partition values. - * @return the partition values - */ - public Map getPartitionValues() { - return partitionValues; - } - - /** - * Gets the job properties. - * @return a map of the job properties - */ - public Map getJobProperties() { - return jobProperties; - } - - /** - * Gets the HCatalog table information. - * @return the table information - */ - public HCatTableInfo getTableInfo() { - return tableInfo; - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/ProgressReporter.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/ProgressReporter.java deleted file mode 100644 index b900135..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/ProgressReporter.java +++ /dev/null @@ -1,92 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import org.apache.hadoop.mapred.Counters; -import org.apache.hadoop.mapred.InputSplit; -import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapreduce.StatusReporter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.TaskInputOutputContext; - -class ProgressReporter extends StatusReporter implements Reporter { - - private TaskInputOutputContext context = null; - private TaskAttemptContext taskAttemptContext = null; - - public ProgressReporter(TaskAttemptContext context) { - if (context instanceof TaskInputOutputContext) { - this.context = (TaskInputOutputContext) context; - } else { - taskAttemptContext = context; - } - } - - @Override - public void setStatus(String status) { - if (context != null) { - context.setStatus(status); - } - } - - @Override - public Counters.Counter getCounter(Enum name) { - return (context != null) ? (Counters.Counter) context.getCounter(name) : null; - } - - @Override - public Counters.Counter getCounter(String group, String name) { - return (context != null) ? (Counters.Counter) context.getCounter(group, name) : null; - } - - @Override - public void incrCounter(Enum key, long amount) { - if (context != null) { - context.getCounter(key).increment(amount); - } - } - - @Override - public void incrCounter(String group, String counter, long amount) { - if (context != null) { - context.getCounter(group, counter).increment(amount); - } - } - - @Override - public InputSplit getInputSplit() throws UnsupportedOperationException { - return null; - } - - public float getProgress() { - /* Required to build against 0.23 Reporter and StatusReporter. */ - /* TODO: determine the progress. */ - return 0.0f; - } - - @Override - public void progress() { - if (context != null) { - context.progress(); - } else { - taskAttemptContext.progress(); - } - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/RecordWriterContainer.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/RecordWriterContainer.java deleted file mode 100644 index 49d76b2..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/RecordWriterContainer.java +++ /dev/null @@ -1,53 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - - -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hcatalog.data.HCatRecord; - -/** - * This class will contain an implementation of an RecordWriter. - * See {@link OutputFormatContainer} for more information about containers. - */ -abstract class RecordWriterContainer extends RecordWriter, HCatRecord> { - - private final org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseRecordWriter; - - /** - * @param context current JobContext - * @param baseRecordWriter RecordWriter that this instance will contain - */ - public RecordWriterContainer(TaskAttemptContext context, - org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseRecordWriter) { - this.baseRecordWriter = baseRecordWriter; - } - - /** - * @return underlying RecordWriter - */ - public org.apache.hadoop.mapred.RecordWriter getBaseRecordWriter() { - return baseRecordWriter; - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/Security.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/Security.java deleted file mode 100644 index 3b216a7..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/Security.java +++ /dev/null @@ -1,191 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.util.Map; -import java.util.Map.Entry; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.hive.thrift.DelegationTokenSelector; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.security.Credentials; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.TokenIdentifier; -import org.apache.hadoop.security.token.TokenSelector; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.thrift.TException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -final class Security { - - private static final Logger LOG = LoggerFactory.getLogger(HCatOutputFormat.class); - - // making sure this is not initialized unless needed - private static final class LazyHolder { - public static final Security INSTANCE = new Security(); - } - - public static Security getInstance() { - return LazyHolder.INSTANCE; - } - - boolean isSecurityEnabled() { - try { - Method m = UserGroupInformation.class.getMethod("isSecurityEnabled"); - return (Boolean) m.invoke(null, (Object[]) null); - } catch (NoSuchMethodException e) { - LOG.info("Security is not supported by this version of hadoop.", e); - } catch (InvocationTargetException e) { - String msg = "Failed to call isSecurityEnabled()"; - LOG.info(msg, e); - throw new IllegalStateException(msg, e); - } catch (IllegalAccessException e) { - String msg = "Failed to call isSecurityEnabled()"; - LOG.info(msg, e); - throw new IllegalStateException(msg, e); - } - return false; - } - - // a signature string to associate with a HCatTableInfo - essentially - // a concatenation of dbname, tablename and partition keyvalues. - String getTokenSignature(OutputJobInfo outputJobInfo) { - StringBuilder result = new StringBuilder(""); - String dbName = outputJobInfo.getDatabaseName(); - if (dbName != null) { - result.append(dbName); - } - String tableName = outputJobInfo.getTableName(); - if (tableName != null) { - result.append("." + tableName); - } - Map partValues = outputJobInfo.getPartitionValues(); - if (partValues != null) { - for (Entry entry : partValues.entrySet()) { - result.append("/"); - result.append(entry.getKey()); - result.append("="); - result.append(entry.getValue()); - } - - } - return result.toString(); - } - - void handleSecurity( - Credentials credentials, - OutputJobInfo outputJobInfo, - HiveMetaStoreClient client, - Configuration conf, - boolean harRequested) - throws IOException, MetaException, TException, Exception { - if (UserGroupInformation.isSecurityEnabled()) { - UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); - // check if oozie has set up a hcat deleg. token - if so use it - TokenSelector hiveTokenSelector = new DelegationTokenSelector(); - //Oozie does not change the service field of the token - //hence by default token generation will have a value of "new Text("")" - //HiveClient will look for a use TokenSelector.selectToken() with service - //set to empty "Text" if hive.metastore.token.signature property is set to null - Token hiveToken = hiveTokenSelector.selectToken( - new Text(), ugi.getTokens()); - if (hiveToken == null) { - // we did not get token set up by oozie, let's get them ourselves here. - // we essentially get a token per unique Output HCatTableInfo - this is - // done because through Pig, setOutput() method is called multiple times - // We want to only get the token once per unique output HCatTableInfo - - // we cannot just get one token since in multi-query case (> 1 store in 1 job) - // or the case when a single pig script results in > 1 jobs, the single - // token will get cancelled by the output committer and the subsequent - // stores will fail - by tying the token with the concatenation of - // dbname, tablename and partition keyvalues of the output - // TableInfo, we can have as many tokens as there are stores and the TokenSelector - // will correctly pick the right tokens which the committer will use and - // cancel. - String tokenSignature = getTokenSignature(outputJobInfo); - // get delegation tokens from hcat server and store them into the "job" - // These will be used in to publish partitions to - // hcat normally in OutputCommitter.commitJob() - // when the JobTracker in Hadoop MapReduce starts supporting renewal of - // arbitrary tokens, the renewer should be the principal of the JobTracker - hiveToken = HCatUtil.extractThriftToken(client.getDelegationToken(ugi.getUserName()), tokenSignature); - - if (harRequested) { - TokenSelector jtTokenSelector = - new org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenSelector(); - Token jtToken = jtTokenSelector.selectToken(org.apache.hadoop.security.SecurityUtil.buildTokenService( - ShimLoader.getHadoopShims().getHCatShim().getResourceManagerAddress(conf)), ugi.getTokens()); - if (jtToken == null) { - //we don't need to cancel this token as the TokenRenewer for JT tokens - //takes care of cancelling them - credentials.addToken( - new Text("hcat jt token"), - HCatUtil.getJobTrackerDelegationToken(conf, ugi.getUserName()) - ); - } - } - - credentials.addToken(new Text(ugi.getUserName() + "_" + tokenSignature), hiveToken); - // this will be used by the outputcommitter to pass on to the metastore client - // which in turn will pass on to the TokenSelector so that it can select - // the right token. - conf.set(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE, tokenSignature); - } - } - } - - void handleSecurity( - Job job, - OutputJobInfo outputJobInfo, - HiveMetaStoreClient client, - Configuration conf, - boolean harRequested) - throws IOException, MetaException, TException, Exception { - handleSecurity(job.getCredentials(), outputJobInfo, client, conf, harRequested); - } - - // we should cancel hcat token if it was acquired by hcat - // and not if it was supplied (ie Oozie). In the latter - // case the HCAT_KEY_TOKEN_SIGNATURE property in the conf will not be set - void cancelToken(HiveMetaStoreClient client, JobContext context) throws IOException, MetaException { - String tokenStrForm = client.getTokenStrForm(); - if (tokenStrForm != null && context.getConfiguration().get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { - try { - client.cancelDelegationToken(tokenStrForm); - } catch (TException e) { - String msg = "Failed to cancel delegation token"; - LOG.error(msg, e); - throw new IOException(msg, e); - } - } - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/StorerInfo.java hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/StorerInfo.java deleted file mode 100644 index e718a4c..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/mapreduce/StorerInfo.java +++ /dev/null @@ -1,109 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.Serializable; -import java.util.Properties; - -/** Information about the storer to use for writing the data. */ -public class StorerInfo implements Serializable { - - /** The serialization version */ - private static final long serialVersionUID = 1L; - - /** The properties for the storage handler */ - private Properties properties; - - private String ofClass; - - private String ifClass; - - private String serdeClass; - - private String storageHandlerClass; - - /** - * Initialize the storer information. - * @param ifClass the input format class - * @param ofClass the output format class - * @param serdeClass the SerDe class - * @param storageHandlerClass the storage handler class - * @param properties the properties for the storage handler - */ - public StorerInfo(String ifClass, String ofClass, String serdeClass, String storageHandlerClass, Properties properties) { - super(); - this.ifClass = ifClass; - this.ofClass = ofClass; - this.serdeClass = serdeClass; - this.storageHandlerClass = storageHandlerClass; - this.properties = properties; - } - - /** - * @return the input format class - */ - public String getIfClass() { - return ifClass; - } - - /** - * @param ifClass the input format class - */ - public void setIfClass(String ifClass) { - this.ifClass = ifClass; - } - - /** - * @return the output format class - */ - public String getOfClass() { - return ofClass; - } - - /** - * @return the serdeClass - */ - public String getSerdeClass() { - return serdeClass; - } - - /** - * @return the storageHandlerClass - */ - public String getStorageHandlerClass() { - return storageHandlerClass; - } - - /** - * @return the storer properties - */ - public Properties getProperties() { - return properties; - } - - /** - * @param properties the storer properties to set - */ - public void setProperties(Properties properties) { - this.properties = properties; - } - - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/oozie/JavaAction.java hcatalog/core/src/main/java/org/apache/hcatalog/oozie/JavaAction.java deleted file mode 100644 index 6de3114..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/oozie/JavaAction.java +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.oozie; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.cli.CliDriver; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; - -public class JavaAction { - - public static void main(String[] args) throws Exception { - - HiveConf conf = new HiveConf(); - conf.addResource(new Path("file:///", System.getProperty("oozie.action.conf.xml"))); - conf.setVar(ConfVars.SEMANTIC_ANALYZER_HOOK, HCatSemanticAnalyzer.class.getName()); - conf.setBoolVar(ConfVars.METASTORE_USE_THRIFT_SASL, true); - SessionState.start(new CliSessionState(conf)); - new CliDriver().processLine(args[0]); - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/rcfile/RCFileMapReduceInputFormat.java hcatalog/core/src/main/java/org/apache/hcatalog/rcfile/RCFileMapReduceInputFormat.java deleted file mode 100644 index 386cc7b..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/rcfile/RCFileMapReduceInputFormat.java +++ /dev/null @@ -1,50 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.rcfile; - -import java.io.IOException; -import java.util.List; - -import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.SequenceFile; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; - -public class RCFileMapReduceInputFormat - extends FileInputFormat { - - @Override - public RecordReader createRecordReader(InputSplit split, - TaskAttemptContext context) throws IOException, InterruptedException { - - context.setStatus(split.toString()); - return new RCFileMapReduceRecordReader(); - } - - @Override - public List getSplits(JobContext job) throws IOException { - - job.getConfiguration().setLong("mapred.min.split.size", SequenceFile.SYNC_INTERVAL); - return super.getSplits(job); - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/rcfile/RCFileMapReduceOutputFormat.java hcatalog/core/src/main/java/org/apache/hcatalog/rcfile/RCFileMapReduceOutputFormat.java deleted file mode 100644 index 3fd3c81..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/rcfile/RCFileMapReduceOutputFormat.java +++ /dev/null @@ -1,105 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.rcfile; - -import java.io.IOException; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.io.RCFile; -import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.io.compress.CompressionCodec; -import org.apache.hadoop.io.compress.DefaultCodec; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.util.ReflectionUtils; - -/** - * The RC file input format using new Hadoop mapreduce APIs. - */ -public class RCFileMapReduceOutputFormat extends - FileOutputFormat, BytesRefArrayWritable> { - - /** - * Set number of columns into the given configuration. - * @param conf - * configuration instance which need to set the column number - * @param columnNum - * column number for RCFile's Writer - * - */ - public static void setColumnNumber(Configuration conf, int columnNum) { - assert columnNum > 0; - conf.setInt(RCFile.COLUMN_NUMBER_CONF_STR, columnNum); - } - - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.lib.output.FileOutputFormat#getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext) - */ - @Override - public org.apache.hadoop.mapreduce.RecordWriter, BytesRefArrayWritable> getRecordWriter( - TaskAttemptContext task) throws IOException, InterruptedException { - - //FileOutputFormat.getWorkOutputPath takes TaskInputOutputContext instead of - //TaskAttemptContext, so can't use that here - FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(task); - Path outputPath = committer.getWorkPath(); - - FileSystem fs = outputPath.getFileSystem(task.getConfiguration()); - - if (!fs.exists(outputPath)) { - fs.mkdirs(outputPath); - } - - Path file = getDefaultWorkFile(task, ""); - - CompressionCodec codec = null; - if (getCompressOutput(task)) { - Class codecClass = getOutputCompressorClass(task, DefaultCodec.class); - codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, task.getConfiguration()); - } - - final RCFile.Writer out = new RCFile.Writer(fs, task.getConfiguration(), file, task, codec); - - return new RecordWriter, BytesRefArrayWritable>() { - - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.RecordWriter#write(java.lang.Object, java.lang.Object) - */ - @Override - public void write(WritableComparable key, BytesRefArrayWritable value) - throws IOException { - out.append(value); - } - - /* (non-Javadoc) - * @see org.apache.hadoop.mapreduce.RecordWriter#close(org.apache.hadoop.mapreduce.TaskAttemptContext) - */ - @Override - public void close(TaskAttemptContext task) throws IOException, InterruptedException { - out.close(); - } - }; - } - -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/rcfile/RCFileMapReduceRecordReader.java hcatalog/core/src/main/java/org/apache/hcatalog/rcfile/RCFileMapReduceRecordReader.java deleted file mode 100644 index 77d3031..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/rcfile/RCFileMapReduceRecordReader.java +++ /dev/null @@ -1,121 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.rcfile; - -import java.io.IOException; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.io.RCFile; -import org.apache.hadoop.hive.ql.io.RCFile.Reader; -import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.lib.input.FileSplit; - -public class RCFileMapReduceRecordReader - extends RecordReader { - - private Reader in; - private long start; - private long end; - private boolean more = true; - - // key and value objects are created once in initialize() and then reused - // for every getCurrentKey() and getCurrentValue() call. This is important - // since RCFile makes an assumption of this fact. - - private LongWritable key; - private BytesRefArrayWritable value; - - @Override - public void close() throws IOException { - in.close(); - } - - @Override - public LongWritable getCurrentKey() throws IOException, InterruptedException { - return key; - } - - @Override - public BytesRefArrayWritable getCurrentValue() throws IOException, InterruptedException { - return value; - } - - @Override - public float getProgress() throws IOException, InterruptedException { - if (end == start) { - return 0.0f; - } else { - return Math.min(1.0f, (in.getPosition() - start) / (float) (end - start)); - } - } - - @Override - public boolean nextKeyValue() throws IOException, InterruptedException { - - more = next(key); - if (more) { - in.getCurrentRow(value); - } - - return more; - } - - private boolean next(LongWritable key) throws IOException { - if (!more) { - return false; - } - - more = in.next(key); - if (!more) { - return false; - } - - if (in.lastSeenSyncPos() >= end) { - more = false; - return more; - } - return more; - } - - @Override - public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, - InterruptedException { - - FileSplit fSplit = (FileSplit) split; - Path path = fSplit.getPath(); - Configuration conf = context.getConfiguration(); - this.in = new RCFile.Reader(path.getFileSystem(conf), path, conf); - this.end = fSplit.getStart() + fSplit.getLength(); - - if (fSplit.getStart() > in.getPosition()) { - in.sync(fSplit.getStart()); - } - - this.start = in.getPosition(); - more = start < end; - - key = new LongWritable(); - value = new BytesRefArrayWritable(); - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/security/HdfsAuthorizationProvider.java hcatalog/core/src/main/java/org/apache/hcatalog/security/HdfsAuthorizationProvider.java deleted file mode 100644 index ef0db3f..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/security/HdfsAuthorizationProvider.java +++ /dev/null @@ -1,337 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.security; - -import static org.apache.hadoop.hive.metastore.MetaStoreUtils.DEFAULT_DATABASE_NAME; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.EnumSet; -import java.util.List; - -import javax.security.auth.login.LoginException; - -import org.apache.commons.lang.ArrayUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.ql.metadata.AuthorizationException; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; -import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProviderBase; -import org.apache.hadoop.hive.ql.security.authorization.Privilege; -import org.apache.hadoop.hive.shims.HadoopShims; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.security.AccessControlException; -import org.apache.hadoop.security.UserGroupInformation; - -/** - * An AuthorizationProvider, which checks against the data access level permissions on HDFS. - * It makes sense to eventually move this class to Hive, so that all hive users can - * use this authorization model. - */ -public class HdfsAuthorizationProvider extends HiveAuthorizationProviderBase { - - protected Warehouse wh; - - //Config variables : create an enum to store them if we have more - private static final String PROXY_USER_NAME = "proxy.user.name"; - - public HdfsAuthorizationProvider() { - super(); - } - - public HdfsAuthorizationProvider(Configuration conf) { - super(); - setConf(conf); - } - - @Override - public void init(Configuration conf) throws HiveException { - hive_db = new HiveProxy(Hive.get(new HiveConf(conf, HiveAuthorizationProvider.class))); - } - - @Override - public void setConf(Configuration conf) { - super.setConf(conf); - try { - this.wh = new Warehouse(conf); - } catch (MetaException ex) { - throw new RuntimeException(ex); - } - } - - protected FsAction getFsAction(Privilege priv, Path path) { - - switch (priv.getPriv()) { - case ALL: - throw new AuthorizationException("no matching Action for Privilege.All"); - case ALTER_DATA: - return FsAction.WRITE; - case ALTER_METADATA: - return FsAction.WRITE; - case CREATE: - return FsAction.WRITE; - case DROP: - return FsAction.WRITE; - case INDEX: - return FsAction.WRITE; - case LOCK: - return FsAction.WRITE; - case SELECT: - return FsAction.READ; - case SHOW_DATABASE: - return FsAction.READ; - case UNKNOWN: - default: - throw new AuthorizationException("Unknown privilege"); - } - } - - protected EnumSet getFsActions(Privilege[] privs, Path path) { - EnumSet actions = EnumSet.noneOf(FsAction.class); - - if (privs == null) { - return actions; - } - - for (Privilege priv : privs) { - actions.add(getFsAction(priv, path)); - } - - return actions; - } - - private static final String DATABASE_WAREHOUSE_SUFFIX = ".db"; - - private Path getDefaultDatabasePath(String dbName) throws MetaException { - if (dbName.equalsIgnoreCase(DEFAULT_DATABASE_NAME)) { - return wh.getWhRoot(); - } - return new Path(wh.getWhRoot(), dbName.toLowerCase() + DATABASE_WAREHOUSE_SUFFIX); - } - - protected Path getDbLocation(Database db) throws HiveException { - try { - String location = db.getLocationUri(); - if (location == null) { - return getDefaultDatabasePath(db.getName()); - } else { - return wh.getDnsPath(wh.getDatabasePath(db)); - } - } catch (MetaException ex) { - throw new HiveException(ex.getMessage()); - } - } - - @Override - public void authorize(Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - //Authorize for global level permissions at the warehouse dir - Path root; - try { - root = wh.getWhRoot(); - authorize(root, readRequiredPriv, writeRequiredPriv); - } catch (MetaException ex) { - throw new HiveException(ex); - } - } - - @Override - public void authorize(Database db, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - if (db == null) { - return; - } - - Path path = getDbLocation(db); - - authorize(path, readRequiredPriv, writeRequiredPriv); - } - - @Override - public void authorize(Table table, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - if (table == null) { - return; - } - - //unlike Hive's model, this can be called at CREATE TABLE as well, since we should authorize - //against the table's declared location - Path path = null; - try { - if (table.getTTable().getSd().getLocation() == null - || table.getTTable().getSd().getLocation().isEmpty()) { - path = wh.getTablePath(hive_db.getDatabase(table.getDbName()), table.getTableName()); - } else { - path = table.getPath(); - } - } catch (MetaException ex) { - throw new HiveException(ex); - } - - authorize(path, readRequiredPriv, writeRequiredPriv); - } - - //TODO: HiveAuthorizationProvider should expose this interface instead of #authorize(Partition, Privilege[], Privilege[]) - public void authorize(Table table, Partition part, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - - if (part == null || part.getLocation() == null) { - authorize(table, readRequiredPriv, writeRequiredPriv); - } else { - authorize(part.getPartitionPath(), readRequiredPriv, writeRequiredPriv); - } - } - - @Override - public void authorize(Partition part, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - if (part == null) { - return; - } - authorize(part.getTable(), part, readRequiredPriv, writeRequiredPriv); - } - - @Override - public void authorize(Table table, Partition part, List columns, - Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - //columns cannot live in different files, just check for partition level permissions - authorize(table, part, readRequiredPriv, writeRequiredPriv); - } - - /** - * Authorization privileges against a path. - * @param path a filesystem path - * @param readRequiredPriv a list of privileges needed for inputs. - * @param writeRequiredPriv a list of privileges needed for outputs. - */ - public void authorize(Path path, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - try { - EnumSet actions = getFsActions(readRequiredPriv, path); - actions.addAll(getFsActions(writeRequiredPriv, path)); - if (actions.isEmpty()) { - return; - } - - checkPermissions(getConf(), path, actions); - - } catch (AccessControlException ex) { - throw new AuthorizationException(ex); - } catch (LoginException ex) { - throw new AuthorizationException(ex); - } catch (IOException ex) { - throw new HiveException(ex); - } - } - - /** - * Checks the permissions for the given path and current user on Hadoop FS. If the given path - * does not exists, it checks for it's parent folder. - */ - protected static void checkPermissions(final Configuration conf, final Path path, - final EnumSet actions) throws IOException, LoginException { - - if (path == null) { - throw new IllegalArgumentException("path is null"); - } - - HadoopShims shims = ShimLoader.getHadoopShims(); - final UserGroupInformation ugi; - if (conf.get(PROXY_USER_NAME) != null) { - ugi = UserGroupInformation.createRemoteUser(conf.get(PROXY_USER_NAME)); - } else { - ugi = shims.getUGIForConf(conf); - } - final String user = shims.getShortUserName(ugi); - - final FileSystem fs = path.getFileSystem(conf); - - if (fs.exists(path)) { - checkPermissions(fs, path, actions, user, ugi.getGroupNames()); - } else if (path.getParent() != null) { - // find the ancestor which exists to check it's permissions - Path par = path.getParent(); - while (par != null) { - if (fs.exists(par)) { - break; - } - par = par.getParent(); - } - - checkPermissions(fs, par, actions, user, ugi.getGroupNames()); - } - } - - /** - * Checks the permissions for the given path and current user on Hadoop FS. If the given path - * does not exists, it returns. - */ - @SuppressWarnings("deprecation") - protected static void checkPermissions(final FileSystem fs, final Path path, - final EnumSet actions, String user, String[] groups) throws IOException, - AccessControlException { - - final FileStatus stat; - - try { - stat = fs.getFileStatus(path); - } catch (FileNotFoundException fnfe) { - // File named by path doesn't exist; nothing to validate. - return; - } catch (org.apache.hadoop.fs.permission.AccessControlException ace) { - // Older hadoop version will throw this @deprecated Exception. - throw new AccessControlException(ace.getMessage()); - } - - final FsPermission dirPerms = stat.getPermission(); - final String grp = stat.getGroup(); - - for (FsAction action : actions) { - if (user.equals(stat.getOwner())) { - if (dirPerms.getUserAction().implies(action)) { - continue; - } - } - if (ArrayUtils.contains(groups, grp)) { - if (dirPerms.getGroupAction().implies(action)) { - continue; - } - } - if (dirPerms.getOtherAction().implies(action)) { - continue; - } - throw new AccessControlException("action " + action + " not permitted on path " - + path + " for user " + user); - } - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/security/StorageDelegationAuthorizationProvider.java hcatalog/core/src/main/java/org/apache/hcatalog/security/StorageDelegationAuthorizationProvider.java deleted file mode 100644 index a6e069f..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/security/StorageDelegationAuthorizationProvider.java +++ /dev/null @@ -1,142 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.security; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.ql.metadata.AuthorizationException; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; -import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider; -import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; -import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProviderBase; -import org.apache.hadoop.hive.ql.security.authorization.Privilege; -import org.apache.hadoop.util.ReflectionUtils; -import org.apache.hcatalog.mapreduce.HCatStorageHandler; - -/** - * A HiveAuthorizationProvider which delegates the authorization requests to - * the underlying AuthorizationProviders obtained from the StorageHandler. - */ -public class StorageDelegationAuthorizationProvider extends HiveAuthorizationProviderBase { - - protected HiveAuthorizationProvider hdfsAuthorizer = new HdfsAuthorizationProvider(); - - protected static Map authProviders = new HashMap(); - - @Override - public void setConf(Configuration conf) { - super.setConf(conf); - hdfsAuthorizer.setConf(conf); - } - - @Override - public void init(Configuration conf) throws HiveException { - hive_db = new HiveProxy(Hive.get(new HiveConf(conf, HiveAuthorizationProvider.class))); - } - - @Override - public void setAuthenticator(HiveAuthenticationProvider authenticator) { - super.setAuthenticator(authenticator); - hdfsAuthorizer.setAuthenticator(authenticator); - } - - static { - registerAuthProvider("org.apache.hadoop.hive.hbase.HBaseStorageHandler", - "org.apache.hcatalog.hbase.HBaseAuthorizationProvider"); - registerAuthProvider("org.apache.hcatalog.hbase.HBaseHCatStorageHandler", - "org.apache.hcatalog.hbase.HBaseAuthorizationProvider"); - } - - //workaround until Hive adds StorageHandler.getAuthorizationProvider(). Remove these parts afterwards - public static void registerAuthProvider(String storageHandlerClass, - String authProviderClass) { - authProviders.put(storageHandlerClass, authProviderClass); - } - - /** Returns the StorageHandler of the Table obtained from the HCatStorageHandler */ - protected HiveAuthorizationProvider getDelegate(Table table) throws HiveException { - HiveStorageHandler handler = table.getStorageHandler(); - - if (handler != null) { - if (handler instanceof HCatStorageHandler) { - return ((HCatStorageHandler) handler).getAuthorizationProvider(); - } else { - String authProviderClass = authProviders.get(handler.getClass().getCanonicalName()); - - if (authProviderClass != null) { - try { - ReflectionUtils.newInstance(getConf().getClassByName(authProviderClass), getConf()); - } catch (ClassNotFoundException ex) { - throw new HiveException("Cannot instantiate delegation AuthotizationProvider"); - } - } - - //else we do not have anything to delegate to - throw new HiveException(String.format("Storage Handler for table:%s is not an instance " + - "of HCatStorageHandler", table.getTableName())); - } - } else { - //return an authorizer for HDFS - return hdfsAuthorizer; - } - } - - @Override - public void authorize(Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - //global authorizations against warehouse hdfs directory - hdfsAuthorizer.authorize(readRequiredPriv, writeRequiredPriv); - } - - @Override - public void authorize(Database db, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - //db's are tied to a hdfs location - hdfsAuthorizer.authorize(db, readRequiredPriv, writeRequiredPriv); - } - - @Override - public void authorize(Table table, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - getDelegate(table).authorize(table, readRequiredPriv, writeRequiredPriv); - } - - @Override - public void authorize(Partition part, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, AuthorizationException { - getDelegate(part.getTable()).authorize(part, readRequiredPriv, writeRequiredPriv); - } - - @Override - public void authorize(Table table, Partition part, List columns, - Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - getDelegate(table).authorize(table, part, columns, readRequiredPriv, writeRequiredPriv); - } -} diff --git hcatalog/core/src/main/java/org/apache/hcatalog/storagehandler/DummyHCatAuthProvider.java hcatalog/core/src/main/java/org/apache/hcatalog/storagehandler/DummyHCatAuthProvider.java deleted file mode 100644 index 6e2b559..0000000 --- hcatalog/core/src/main/java/org/apache/hcatalog/storagehandler/DummyHCatAuthProvider.java +++ /dev/null @@ -1,144 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.storagehandler; - -import java.util.List; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.ql.metadata.AuthorizationException; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider; -import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; -import org.apache.hadoop.hive.ql.security.authorization.Privilege; - -/** - * This class is a dummy implementation of HiveAuthorizationProvider to provide - * dummy authorization functionality for other classes to extend and override. - */ -class DummyHCatAuthProvider implements HiveAuthorizationProvider { - - @Override - public Configuration getConf() { - return null; - } - - @Override - public void setConf(Configuration conf) { - } - - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #init(org.apache.hadoop.conf.Configuration) - */ - @Override - public void init(Configuration conf) throws HiveException { - } - - @Override - public HiveAuthenticationProvider getAuthenticator() { - return null; - } - - @Override - public void setAuthenticator(HiveAuthenticationProvider authenticator) { - } - - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } - - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.metastore.api.Database, - * org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Database db, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } - - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.ql.metadata.Table, - * org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Table table, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } - - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.ql.metadata.Partition, - * org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Partition part, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } - - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.ql.metadata.Table, - * org.apache.hadoop.hive.ql.metadata.Partition, java.util.List, - * org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Table table, Partition part, List columns, - Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - } - -} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/HCatCli.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/HCatCli.java new file mode 100644 index 0000000..12eec18 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/HCatCli.java @@ -0,0 +1,331 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.cli; + +import java.io.BufferedReader; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintStream; +import java.io.PrintWriter; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.Properties; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.cli.Parser; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.common.LogUtils; +import org.apache.hadoop.hive.common.LogUtils.LogInitializationException; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.ql.CommandNeedRetryException; +import org.apache.hadoop.hive.ql.processors.DfsProcessor; +import org.apache.hadoop.hive.ql.processors.SetProcessor; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatUtil; + +public class HCatCli { + + @SuppressWarnings("static-access") + public static void main(String[] args) { + + try { + LogUtils.initHiveLog4j(); + } catch (LogInitializationException e) { + + } + + CliSessionState ss = new CliSessionState(new HiveConf(SessionState.class)); + ss.in = System.in; + try { + ss.out = new PrintStream(System.out, true, "UTF-8"); + ss.err = new PrintStream(System.err, true, "UTF-8"); + } catch (UnsupportedEncodingException e) { + System.exit(1); + } + + HiveConf conf = ss.getConf(); + + HiveConf.setVar(conf, ConfVars.SEMANTIC_ANALYZER_HOOK, HCatSemanticAnalyzer.class.getName()); + + SessionState.start(ss); + + Options options = new Options(); + + // -e 'quoted-query-string' + options.addOption(OptionBuilder + .hasArg() + .withArgName("exec") + .withDescription("hcat command given from command line") + .create('e')); + + // -f + options.addOption(OptionBuilder + .hasArg() + .withArgName("file") + .withDescription("hcat commands in file") + .create('f')); + + // -g + options.addOption(OptionBuilder + .hasArg(). + withArgName("group"). + withDescription("group for the db/table specified in CREATE statement"). + create('g')); + + // -p + options.addOption(OptionBuilder + .hasArg() + .withArgName("perms") + .withDescription("permissions for the db/table specified in CREATE statement") + .create('p')); + + // -D + options.addOption(OptionBuilder + .hasArgs(2) + .withArgName("property=value") + .withValueSeparator() + .withDescription("use hadoop value for given property") + .create('D')); + + // [-h|--help] + options.addOption(new Option("h", "help", false, "Print help information")); + + Parser parser = new GnuParser(); + CommandLine cmdLine = null; + + try { + cmdLine = parser.parse(options, args); + + } catch (ParseException e) { + printUsage(options, ss.err); + System.exit(1); + } + // -e + String execString = (String) cmdLine.getOptionValue('e'); + // -f + String fileName = (String) cmdLine.getOptionValue('f'); + // -h + if (cmdLine.hasOption('h')) { + printUsage(options, ss.out); + System.exit(0); + } + + if (execString != null && fileName != null) { + ss.err.println("The '-e' and '-f' options cannot be specified simultaneously"); + printUsage(options, ss.err); + System.exit(1); + } + + // -p + String perms = (String) cmdLine.getOptionValue('p'); + if (perms != null) { + validatePermissions(ss, conf, perms); + } + + // -g + String grp = (String) cmdLine.getOptionValue('g'); + if (grp != null) { + conf.set(HCatConstants.HCAT_GROUP, grp); + } + + // -D + setConfProperties(conf, cmdLine.getOptionProperties("D")); + + if (execString != null) { + System.exit(processLine(execString)); + } + + try { + if (fileName != null) { + System.exit(processFile(fileName)); + } + } catch (FileNotFoundException e) { + ss.err.println("Input file not found. (" + e.getMessage() + ")"); + System.exit(1); + } catch (IOException e) { + ss.err.println("Could not open input file for reading. (" + e.getMessage() + ")"); + System.exit(1); + } + + // -h + printUsage(options, ss.err); + System.exit(1); + } + + private static void setConfProperties(HiveConf conf, Properties props) { + for (java.util.Map.Entry e : props.entrySet()) + conf.set((String) e.getKey(), (String) e.getValue()); + } + + private static int processLine(String line) { + int ret = 0; + + String command = ""; + for (String oneCmd : line.split(";")) { + + if (StringUtils.endsWith(oneCmd, "\\")) { + command += StringUtils.chop(oneCmd) + ";"; + continue; + } else { + command += oneCmd; + } + if (StringUtils.isBlank(command)) { + continue; + } + + ret = processCmd(command); + command = ""; + } + return ret; + } + + private static int processFile(String fileName) throws IOException { + FileReader fileReader = null; + BufferedReader reader = null; + try { + fileReader = new FileReader(fileName); + reader = new BufferedReader(fileReader); + String line; + StringBuilder qsb = new StringBuilder(); + + while ((line = reader.readLine()) != null) { + qsb.append(line + "\n"); + } + + return (processLine(qsb.toString())); + } finally { + if (fileReader != null) { + fileReader.close(); + } + if (reader != null) { + reader.close(); + } + } + } + + private static int processCmd(String cmd) { + + SessionState ss = SessionState.get(); + long start = System.currentTimeMillis(); + + cmd = cmd.trim(); + String firstToken = cmd.split("\\s+")[0].trim(); + + if (firstToken.equalsIgnoreCase("set")) { + return new SetProcessor().run(cmd.substring(firstToken.length()).trim()).getResponseCode(); + } else if (firstToken.equalsIgnoreCase("dfs")) { + return new DfsProcessor(ss.getConf()).run(cmd.substring(firstToken.length()).trim()).getResponseCode(); + } + + HCatDriver driver = new HCatDriver(); + + int ret = driver.run(cmd).getResponseCode(); + + if (ret != 0) { + driver.close(); + System.exit(ret); + } + + ArrayList res = new ArrayList(); + try { + while (driver.getResults(res)) { + for (String r : res) { + ss.out.println(r); + } + res.clear(); + } + } catch (IOException e) { + ss.err.println("Failed with exception " + e.getClass().getName() + ":" + + e.getMessage() + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); + ret = 1; + } catch (CommandNeedRetryException e) { + ss.err.println("Failed with exception " + e.getClass().getName() + ":" + + e.getMessage() + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); + ret = 1; + } + + int cret = driver.close(); + if (ret == 0) { + ret = cret; + } + + long end = System.currentTimeMillis(); + if (end > start) { + double timeTaken = (end - start) / 1000.0; + ss.err.println("Time taken: " + timeTaken + " seconds"); + } + return ret; + } + + private static void printUsage(Options options, OutputStream os) { + PrintWriter pw = new PrintWriter(os); + new HelpFormatter().printHelp(pw, 2 * HelpFormatter.DEFAULT_WIDTH, + "hcat { -e \"\" | -f \"\" } [ -g \"\" ] [ -p \"\" ] [ -D\"=\" ]", + null, options, HelpFormatter.DEFAULT_LEFT_PAD, HelpFormatter.DEFAULT_DESC_PAD, + null, false); + pw.flush(); + } + + private static void validatePermissions(CliSessionState ss, HiveConf conf, String perms) { + perms = perms.trim(); + FsPermission fp = null; + + if (perms.matches("^\\s*([r,w,x,-]{9})\\s*$")) { + fp = FsPermission.valueOf("d" + perms); + } else if (perms.matches("^\\s*([0-7]{3})\\s*$")) { + fp = new FsPermission(Short.decode("0" + perms)); + } else { + ss.err.println("Invalid permission specification: " + perms); + System.exit(1); + } + + if (!HCatUtil.validateMorePermissive(fp.getUserAction(), fp.getGroupAction())) { + ss.err.println("Invalid permission specification: " + perms + " : user permissions must be more permissive than group permission "); + System.exit(1); + } + if (!HCatUtil.validateMorePermissive(fp.getGroupAction(), fp.getOtherAction())) { + ss.err.println("Invalid permission specification: " + perms + " : group permissions must be more permissive than other permission "); + System.exit(1); + } + if ((!HCatUtil.validateExecuteBitPresentIfReadOrWrite(fp.getUserAction())) || + (!HCatUtil.validateExecuteBitPresentIfReadOrWrite(fp.getGroupAction())) || + (!HCatUtil.validateExecuteBitPresentIfReadOrWrite(fp.getOtherAction()))) { + ss.err.println("Invalid permission specification: " + perms + " : permissions must have execute permissions if read or write permissions are specified "); + System.exit(1); + } + + conf.set(HCatConstants.HCAT_PERMS, "d" + fp.toString()); + + } + + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/HCatDriver.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/HCatDriver.java new file mode 100644 index 0000000..c98ed37 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/HCatDriver.java @@ -0,0 +1,143 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.cli; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.ql.CommandNeedRetryException; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hive.hcatalog.common.HCatConstants; + +public class HCatDriver extends Driver { + + @Override + public CommandProcessorResponse run(String command) { + + CommandProcessorResponse cpr = null; + try { + cpr = super.run(command); + } catch (CommandNeedRetryException e) { + return new CommandProcessorResponse(-1, e.toString(), ""); + } + + SessionState ss = SessionState.get(); + + if (cpr.getResponseCode() == 0) { + // Only attempt to do this, if cmd was successful. + int rc = setFSPermsNGrp(ss); + cpr = new CommandProcessorResponse(rc); + } + // reset conf vars + ss.getConf().set(HCatConstants.HCAT_CREATE_DB_NAME, ""); + ss.getConf().set(HCatConstants.HCAT_CREATE_TBL_NAME, ""); + + return cpr; + } + + private int setFSPermsNGrp(SessionState ss) { + + Configuration conf = ss.getConf(); + + String tblName = conf.get(HCatConstants.HCAT_CREATE_TBL_NAME, ""); + if (tblName.isEmpty()) { + tblName = conf.get("import.destination.table", ""); + conf.set("import.destination.table", ""); + } + String dbName = conf.get(HCatConstants.HCAT_CREATE_DB_NAME, ""); + String grp = conf.get(HCatConstants.HCAT_GROUP, null); + String permsStr = conf.get(HCatConstants.HCAT_PERMS, null); + + if (tblName.isEmpty() && dbName.isEmpty()) { + // it wasn't create db/table + return 0; + } + + if (null == grp && null == permsStr) { + // there were no grp and perms to begin with. + return 0; + } + + FsPermission perms = FsPermission.valueOf(permsStr); + + if (!tblName.isEmpty()) { + Hive db = null; + try { + db = Hive.get(); + Table tbl = db.getTable(tblName); + Path tblPath = tbl.getPath(); + + FileSystem fs = tblPath.getFileSystem(conf); + if (null != perms) { + fs.setPermission(tblPath, perms); + } + if (null != grp) { + fs.setOwner(tblPath, null, grp); + } + return 0; + + } catch (Exception e) { + ss.err.println(String.format("Failed to set permissions/groups on TABLE: <%s> %s", tblName, e.getMessage())); + try { // We need to drop the table. + if (null != db) { + db.dropTable(tblName); + } + } catch (HiveException he) { + ss.err.println(String.format("Failed to drop TABLE <%s> after failing to set permissions/groups on it. %s", tblName, e.getMessage())); + } + return 1; + } + } else { + // looks like a db operation + if (dbName.isEmpty() || dbName.equals(MetaStoreUtils.DEFAULT_DATABASE_NAME)) { + // We dont set perms or groups for default dir. + return 0; + } else { + try { + Hive db = Hive.get(); + Path dbPath = new Warehouse(conf).getDatabasePath(db.getDatabase(dbName)); + FileSystem fs = dbPath.getFileSystem(conf); + if (perms != null) { + fs.setPermission(dbPath, perms); + } + if (null != grp) { + fs.setOwner(dbPath, null, grp); + } + return 0; + } catch (Exception e) { + ss.err.println(String.format("Failed to set permissions and/or group on DB: <%s> %s", dbName, e.getMessage())); + try { + Hive.get().dropDatabase(dbName); + } catch (Exception e1) { + ss.err.println(String.format("Failed to drop DB <%s> after failing to set permissions/group on it. %s", dbName, e1.getMessage())); + } + return 1; + } + } + } + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/CreateDatabaseHook.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/CreateDatabaseHook.java new file mode 100644 index 0000000..0720f0a --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/CreateDatabaseHook.java @@ -0,0 +1,96 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.cli.SemanticAnalysis; + +import java.io.Serializable; +import java.util.List; + +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.HiveParser; +import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.CreateDatabaseDesc; +import org.apache.hadoop.hive.ql.plan.DDLWork; +import org.apache.hadoop.hive.ql.security.authorization.Privilege; +import org.apache.hive.hcatalog.common.HCatConstants; + +final class CreateDatabaseHook extends HCatSemanticAnalyzerBase { + + String databaseName; + + @Override + public ASTNode preAnalyze(HiveSemanticAnalyzerHookContext context, ASTNode ast) + throws SemanticException { + + Hive db; + try { + db = context.getHive(); + } catch (HiveException e) { + throw new SemanticException("Couldn't get Hive DB instance in semantic analysis phase.", e); + } + + // Analyze and create tbl properties object + int numCh = ast.getChildCount(); + + databaseName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) ast.getChild(0)); + + for (int num = 1; num < numCh; num++) { + ASTNode child = (ASTNode) ast.getChild(num); + + switch (child.getToken().getType()) { + + case HiveParser.TOK_IFNOTEXISTS: + try { + List dbs = db.getDatabasesByPattern(databaseName); + if (dbs != null && dbs.size() > 0) { // db exists + return ast; + } + } catch (HiveException e) { + throw new SemanticException(e); + } + break; + } + } + + return ast; + } + + @Override + public void postAnalyze(HiveSemanticAnalyzerHookContext context, + List> rootTasks) throws SemanticException { + context.getConf().set(HCatConstants.HCAT_CREATE_DB_NAME, databaseName); + super.postAnalyze(context, rootTasks); + } + + @Override + protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext context, + Hive hive, DDLWork work) throws HiveException { + CreateDatabaseDesc createDb = work.getCreateDatabaseDesc(); + if (createDb != null) { + Database db = new Database(createDb.getName(), createDb.getComment(), + createDb.getLocationUri(), createDb.getDatabaseProperties()); + authorize(db, Privilege.CREATE); + } + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/CreateTableHook.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/CreateTableHook.java new file mode 100644 index 0000000..496ace6 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/CreateTableHook.java @@ -0,0 +1,245 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.cli.SemanticAnalysis; + +import java.io.IOException; +import java.io.Serializable; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.exec.DDLTask; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.io.RCFileInputFormat; +import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.HiveParser; +import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.CreateTableDesc; +import org.apache.hadoop.hive.ql.security.authorization.Privilege; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.mapreduce.HCatStorageHandler; + +final class CreateTableHook extends HCatSemanticAnalyzerBase { + + private String tableName; + + @Override + public ASTNode preAnalyze(HiveSemanticAnalyzerHookContext context, + ASTNode ast) throws SemanticException { + + Hive db; + try { + db = context.getHive(); + } catch (HiveException e) { + throw new SemanticException( + "Couldn't get Hive DB instance in semantic analysis phase.", + e); + } + + // Analyze and create tbl properties object + int numCh = ast.getChildCount(); + + String inputFormat = null, outputFormat = null; + tableName = BaseSemanticAnalyzer.getUnescapedName((ASTNode) ast + .getChild(0)); + boolean likeTable = false; + + for (int num = 1; num < numCh; num++) { + ASTNode child = (ASTNode) ast.getChild(num); + + switch (child.getToken().getType()) { + + case HiveParser.TOK_QUERY: // CTAS + throw new SemanticException( + "Operation not supported. Create table as " + + "Select is not a valid operation."); + + case HiveParser.TOK_TABLEBUCKETS: + break; + + case HiveParser.TOK_TBLSEQUENCEFILE: + inputFormat = HCatConstants.SEQUENCEFILE_INPUT; + outputFormat = HCatConstants.SEQUENCEFILE_OUTPUT; + break; + + case HiveParser.TOK_TBLTEXTFILE: + inputFormat = org.apache.hadoop.mapred.TextInputFormat.class.getName(); + outputFormat = org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat.class.getName(); + + break; + + case HiveParser.TOK_LIKETABLE: + likeTable = true; + break; + + case HiveParser.TOK_IFNOTEXISTS: + try { + List tables = db.getTablesByPattern(tableName); + if (tables != null && tables.size() > 0) { // table + // exists + return ast; + } + } catch (HiveException e) { + throw new SemanticException(e); + } + break; + + case HiveParser.TOK_TABLEPARTCOLS: + List partCols = BaseSemanticAnalyzer + .getColumns((ASTNode) child.getChild(0), false); + for (FieldSchema fs : partCols) { + if (!fs.getType().equalsIgnoreCase("string")) { + throw new SemanticException( + "Operation not supported. HCatalog only " + + "supports partition columns of type string. " + + "For column: " + + fs.getName() + + " Found type: " + fs.getType()); + } + } + break; + + case HiveParser.TOK_STORAGEHANDLER: + String storageHandler = BaseSemanticAnalyzer + .unescapeSQLString(child.getChild(0).getText()); + if (org.apache.commons.lang.StringUtils + .isNotEmpty(storageHandler)) { + return ast; + } + + break; + + case HiveParser.TOK_TABLEFILEFORMAT: + if (child.getChildCount() < 2) { + throw new SemanticException( + "Incomplete specification of File Format. " + + "You must provide InputFormat, OutputFormat."); + } + inputFormat = BaseSemanticAnalyzer.unescapeSQLString(child + .getChild(0).getText()); + outputFormat = BaseSemanticAnalyzer.unescapeSQLString(child + .getChild(1).getText()); + break; + + case HiveParser.TOK_TBLRCFILE: + inputFormat = RCFileInputFormat.class.getName(); + outputFormat = RCFileOutputFormat.class.getName(); + break; + + } + } + + if (!likeTable && (inputFormat == null || outputFormat == null)) { + throw new SemanticException( + "STORED AS specification is either incomplete or incorrect."); + } + + + return ast; + } + + @Override + public void postAnalyze(HiveSemanticAnalyzerHookContext context, + List> rootTasks) + throws SemanticException { + + if (rootTasks.size() == 0) { + // There will be no DDL task created in case if its CREATE TABLE IF + // NOT EXISTS + return; + } + CreateTableDesc desc = ((DDLTask) rootTasks.get(rootTasks.size() - 1)) + .getWork().getCreateTblDesc(); + if (desc == null) { + // Desc will be null if its CREATE TABLE LIKE. Desc will be + // contained in CreateTableLikeDesc. Currently, HCat disallows CTLT in + // pre-hook. So, desc can never be null. + return; + } + Map tblProps = desc.getTblProps(); + if (tblProps == null) { + // tblProps will be null if user didnt use tblprops in his CREATE + // TABLE cmd. + tblProps = new HashMap(); + + } + + // first check if we will allow the user to create table. + String storageHandler = desc.getStorageHandler(); + if (StringUtils.isEmpty(storageHandler)) { + } else { + try { + HCatStorageHandler storageHandlerInst = HCatUtil + .getStorageHandler(context.getConf(), + desc.getStorageHandler(), + desc.getSerName(), + desc.getInputFormat(), + desc.getOutputFormat()); + //Authorization checks are performed by the storageHandler.getAuthorizationProvider(), if + //StorageDelegationAuthorizationProvider is used. + } catch (IOException e) { + throw new SemanticException(e); + } + } + + if (desc != null) { + try { + Table table = context.getHive().newTable(desc.getTableName()); + if (desc.getLocation() != null) { + table.setDataLocation(new Path(desc.getLocation()).toUri()); + } + if (desc.getStorageHandler() != null) { + table.setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, + desc.getStorageHandler()); + } + for (Map.Entry prop : tblProps.entrySet()) { + table.setProperty(prop.getKey(), prop.getValue()); + } + for (Map.Entry prop : desc.getSerdeProps().entrySet()) { + table.setSerdeParam(prop.getKey(), prop.getValue()); + } + //TODO: set other Table properties as needed + + //authorize against the table operation so that location permissions can be checked if any + + if (HiveConf.getBoolVar(context.getConf(), + HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) { + authorize(table, Privilege.CREATE); + } + } catch (HiveException ex) { + throw new SemanticException(ex); + } + } + + desc.setTblProps(tblProps); + context.getConf().set(HCatConstants.HCAT_CREATE_TBL_NAME, tableName); + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java new file mode 100644 index 0000000..58caed4 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzer.java @@ -0,0 +1,375 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.cli.SemanticAnalysis; + +import java.io.Serializable; +import java.util.List; + +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.AbstractSemanticAnalyzerHook; +import org.apache.hadoop.hive.ql.parse.HiveParser; +import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.AlterTableDesc; +import org.apache.hadoop.hive.ql.plan.DDLWork; +import org.apache.hadoop.hive.ql.plan.DescDatabaseDesc; +import org.apache.hadoop.hive.ql.plan.DescTableDesc; +import org.apache.hadoop.hive.ql.plan.DropDatabaseDesc; +import org.apache.hadoop.hive.ql.plan.DropTableDesc; +import org.apache.hadoop.hive.ql.plan.HiveOperation; +import org.apache.hadoop.hive.ql.plan.PartitionSpec; +import org.apache.hadoop.hive.ql.plan.ShowDatabasesDesc; +import org.apache.hadoop.hive.ql.plan.ShowPartitionsDesc; +import org.apache.hadoop.hive.ql.plan.ShowTableStatusDesc; +import org.apache.hadoop.hive.ql.plan.ShowTablesDesc; +import org.apache.hadoop.hive.ql.plan.SwitchDatabaseDesc; +import org.apache.hadoop.hive.ql.security.authorization.Privilege; +import org.apache.hive.hcatalog.common.ErrorType; +import org.apache.hive.hcatalog.common.HCatException; + +public class HCatSemanticAnalyzer extends HCatSemanticAnalyzerBase { + + private AbstractSemanticAnalyzerHook hook; + private ASTNode ast; + + + @Override + public ASTNode preAnalyze(HiveSemanticAnalyzerHookContext context, ASTNode ast) + throws SemanticException { + + this.ast = ast; + switch (ast.getToken().getType()) { + + // HCat wants to intercept following tokens and special-handle them. + case HiveParser.TOK_CREATETABLE: + hook = new CreateTableHook(); + return hook.preAnalyze(context, ast); + + case HiveParser.TOK_CREATEDATABASE: + hook = new CreateDatabaseHook(); + return hook.preAnalyze(context, ast); + + case HiveParser.TOK_ALTERTABLE_PARTITION: + if (((ASTNode) ast.getChild(1)).getToken().getType() == HiveParser.TOK_ALTERTABLE_FILEFORMAT) { + return ast; + } else if (((ASTNode) ast.getChild(1)).getToken().getType() == HiveParser.TOK_ALTERTABLE_ALTERPARTS_MERGEFILES) { + // unsupported + throw new SemanticException("Operation not supported."); + } else { + return ast; + } + + // HCat will allow these operations to be performed. + // Database DDL + case HiveParser.TOK_SHOWDATABASES: + case HiveParser.TOK_DROPDATABASE: + case HiveParser.TOK_SWITCHDATABASE: + case HiveParser.TOK_DESCDATABASE: + case HiveParser.TOK_ALTERDATABASE_PROPERTIES: + + // Index DDL + case HiveParser.TOK_ALTERINDEX_PROPERTIES: + case HiveParser.TOK_CREATEINDEX: + case HiveParser.TOK_DROPINDEX: + case HiveParser.TOK_SHOWINDEXES: + + // View DDL + // "alter view add partition" does not work because of the nature of implementation + // of the DDL in hive. Hive will internally invoke another Driver on the select statement, + // and HCat does not let "select" statement through. I cannot find a way to get around it + // without modifying hive code. So just leave it unsupported. + //case HiveParser.TOK_ALTERVIEW_ADDPARTS: + case HiveParser.TOK_ALTERVIEW_DROPPARTS: + case HiveParser.TOK_ALTERVIEW_PROPERTIES: + case HiveParser.TOK_ALTERVIEW_RENAME: + case HiveParser.TOK_CREATEVIEW: + case HiveParser.TOK_DROPVIEW: + + // Authorization DDL + case HiveParser.TOK_CREATEROLE: + case HiveParser.TOK_DROPROLE: + case HiveParser.TOK_GRANT_ROLE: + case HiveParser.TOK_GRANT_WITH_OPTION: + case HiveParser.TOK_GRANT: + case HiveParser.TOK_REVOKE_ROLE: + case HiveParser.TOK_REVOKE: + case HiveParser.TOK_SHOW_GRANT: + case HiveParser.TOK_SHOW_ROLE_GRANT: + + // Misc DDL + case HiveParser.TOK_LOCKTABLE: + case HiveParser.TOK_UNLOCKTABLE: + case HiveParser.TOK_SHOWLOCKS: + case HiveParser.TOK_DESCFUNCTION: + case HiveParser.TOK_SHOWFUNCTIONS: + case HiveParser.TOK_EXPLAIN: + + // Table DDL + case HiveParser.TOK_ALTERTABLE_ADDPARTS: + case HiveParser.TOK_ALTERTABLE_ADDCOLS: + case HiveParser.TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION: + case HiveParser.TOK_ALTERTABLE_SERDEPROPERTIES: + case HiveParser.TOK_ALTERTABLE_CLUSTER_SORT: + case HiveParser.TOK_ALTERTABLE_DROPPARTS: + case HiveParser.TOK_ALTERTABLE_PROPERTIES: + case HiveParser.TOK_ALTERTABLE_RENAME: + case HiveParser.TOK_ALTERTABLE_RENAMECOL: + case HiveParser.TOK_ALTERTABLE_REPLACECOLS: + case HiveParser.TOK_ALTERTABLE_SERIALIZER: + case HiveParser.TOK_ALTERTABLE_TOUCH: + case HiveParser.TOK_DESCTABLE: + case HiveParser.TOK_DROPTABLE: + case HiveParser.TOK_SHOW_TABLESTATUS: + case HiveParser.TOK_SHOWPARTITIONS: + case HiveParser.TOK_SHOWTABLES: + return ast; + + // In all other cases, throw an exception. Its a white-list of allowed operations. + default: + throw new SemanticException("Operation not supported."); + + } + } + + @Override + public void postAnalyze(HiveSemanticAnalyzerHookContext context, + List> rootTasks) throws SemanticException { + + try { + + switch (ast.getToken().getType()) { + + case HiveParser.TOK_CREATETABLE: + case HiveParser.TOK_CREATEDATABASE: + case HiveParser.TOK_ALTERTABLE_PARTITION: + + // HCat will allow these operations to be performed. + // Database DDL + case HiveParser.TOK_SHOWDATABASES: + case HiveParser.TOK_DROPDATABASE: + case HiveParser.TOK_SWITCHDATABASE: + case HiveParser.TOK_DESCDATABASE: + case HiveParser.TOK_ALTERDATABASE_PROPERTIES: + + // Index DDL + case HiveParser.TOK_ALTERINDEX_PROPERTIES: + case HiveParser.TOK_CREATEINDEX: + case HiveParser.TOK_DROPINDEX: + case HiveParser.TOK_SHOWINDEXES: + + // View DDL + //case HiveParser.TOK_ALTERVIEW_ADDPARTS: + case HiveParser.TOK_ALTERVIEW_DROPPARTS: + case HiveParser.TOK_ALTERVIEW_PROPERTIES: + case HiveParser.TOK_ALTERVIEW_RENAME: + case HiveParser.TOK_CREATEVIEW: + case HiveParser.TOK_DROPVIEW: + + // Authorization DDL + case HiveParser.TOK_CREATEROLE: + case HiveParser.TOK_DROPROLE: + case HiveParser.TOK_GRANT_ROLE: + case HiveParser.TOK_GRANT_WITH_OPTION: + case HiveParser.TOK_GRANT: + case HiveParser.TOK_REVOKE_ROLE: + case HiveParser.TOK_REVOKE: + case HiveParser.TOK_SHOW_GRANT: + case HiveParser.TOK_SHOW_ROLE_GRANT: + + // Misc DDL + case HiveParser.TOK_LOCKTABLE: + case HiveParser.TOK_UNLOCKTABLE: + case HiveParser.TOK_SHOWLOCKS: + case HiveParser.TOK_DESCFUNCTION: + case HiveParser.TOK_SHOWFUNCTIONS: + case HiveParser.TOK_EXPLAIN: + + // Table DDL + case HiveParser.TOK_ALTERTABLE_ADDPARTS: + case HiveParser.TOK_ALTERTABLE_ADDCOLS: + case HiveParser.TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION: + case HiveParser.TOK_ALTERTABLE_SERDEPROPERTIES: + case HiveParser.TOK_ALTERTABLE_CLUSTER_SORT: + case HiveParser.TOK_ALTERTABLE_DROPPARTS: + case HiveParser.TOK_ALTERTABLE_PROPERTIES: + case HiveParser.TOK_ALTERTABLE_RENAME: + case HiveParser.TOK_ALTERTABLE_RENAMECOL: + case HiveParser.TOK_ALTERTABLE_REPLACECOLS: + case HiveParser.TOK_ALTERTABLE_SERIALIZER: + case HiveParser.TOK_ALTERTABLE_TOUCH: + case HiveParser.TOK_DESCTABLE: + case HiveParser.TOK_DROPTABLE: + case HiveParser.TOK_SHOW_TABLESTATUS: + case HiveParser.TOK_SHOWPARTITIONS: + case HiveParser.TOK_SHOWTABLES: + break; + + default: + throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, "Unexpected token: " + ast.getToken()); + } + + authorizeDDL(context, rootTasks); + + } catch (HCatException e) { + throw new SemanticException(e); + } catch (HiveException e) { + throw new SemanticException(e); + } + + if (hook != null) { + hook.postAnalyze(context, rootTasks); + } + } + + private String extractTableName(String compoundName) { + /* + * the table name can potentially be a dot-format one with column names + * specified as part of the table name. e.g. a.b.c where b is a column in + * a and c is a field of the object/column b etc. For authorization + * purposes, we should use only the first part of the dotted name format. + * + */ + + String[] words = compoundName.split("\\."); + return words[0]; + } + + @Override + protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext cntxt, Hive hive, DDLWork work) + throws HiveException { + // DB opereations, none of them are enforced by Hive right now. + + ShowDatabasesDesc showDatabases = work.getShowDatabasesDesc(); + if (showDatabases != null) { + authorize(HiveOperation.SHOWDATABASES.getInputRequiredPrivileges(), + HiveOperation.SHOWDATABASES.getOutputRequiredPrivileges()); + } + + DropDatabaseDesc dropDb = work.getDropDatabaseDesc(); + if (dropDb != null) { + Database db = cntxt.getHive().getDatabase(dropDb.getDatabaseName()); + authorize(db, Privilege.DROP); + } + + DescDatabaseDesc descDb = work.getDescDatabaseDesc(); + if (descDb != null) { + Database db = cntxt.getHive().getDatabase(descDb.getDatabaseName()); + authorize(db, Privilege.SELECT); + } + + SwitchDatabaseDesc switchDb = work.getSwitchDatabaseDesc(); + if (switchDb != null) { + Database db = cntxt.getHive().getDatabase(switchDb.getDatabaseName()); + authorize(db, Privilege.SELECT); + } + + ShowTablesDesc showTables = work.getShowTblsDesc(); + if (showTables != null) { + String dbName = showTables.getDbName() == null ? cntxt.getHive().getCurrentDatabase() + : showTables.getDbName(); + authorize(cntxt.getHive().getDatabase(dbName), Privilege.SELECT); + } + + ShowTableStatusDesc showTableStatus = work.getShowTblStatusDesc(); + if (showTableStatus != null) { + String dbName = showTableStatus.getDbName() == null ? cntxt.getHive().getCurrentDatabase() + : showTableStatus.getDbName(); + authorize(cntxt.getHive().getDatabase(dbName), Privilege.SELECT); + } + + // TODO: add alter database support in HCat + + // Table operations. + + DropTableDesc dropTable = work.getDropTblDesc(); + if (dropTable != null) { + if (dropTable.getPartSpecs() == null) { + // drop table is already enforced by Hive. We only check for table level location even if the + // table is partitioned. + } else { + //this is actually a ALTER TABLE DROP PARITITION statement + for (PartitionSpec partSpec : dropTable.getPartSpecs()) { + // partitions are not added as write entries in drop partitions in Hive + Table table = hive.getTable(hive.getCurrentDatabase(), dropTable.getTableName()); + List partitions = null; + try { + partitions = hive.getPartitionsByFilter(table, partSpec.toString()); + } catch (Exception e) { + throw new HiveException(e); + } + + for (Partition part : partitions) { + authorize(part, Privilege.DROP); + } + } + } + } + + AlterTableDesc alterTable = work.getAlterTblDesc(); + if (alterTable != null) { + Table table = hive.getTable(hive.getCurrentDatabase(), alterTable.getOldName(), false); + + Partition part = null; + if (alterTable.getPartSpec() != null) { + part = hive.getPartition(table, alterTable.getPartSpec(), false); + } + + String newLocation = alterTable.getNewLocation(); + + /* Hcat requires ALTER_DATA privileges for ALTER TABLE LOCATION statements + * for the old table/partition location and the new location. + */ + if (alterTable.getOp() == AlterTableDesc.AlterTableTypes.ALTERLOCATION) { + if (part != null) { + authorize(part, Privilege.ALTER_DATA); // authorize for the old + // location, and new location + part.setLocation(newLocation); + authorize(part, Privilege.ALTER_DATA); + } else { + authorize(table, Privilege.ALTER_DATA); // authorize for the old + // location, and new location + table.getTTable().getSd().setLocation(newLocation); + authorize(table, Privilege.ALTER_DATA); + } + } + //other alter operations are already supported by Hive + } + + // we should be careful when authorizing table based on just the + // table name. If columns have separate authorization domain, it + // must be honored + DescTableDesc descTable = work.getDescTblDesc(); + if (descTable != null) { + String tableName = extractTableName(descTable.getTableName()); + authorizeTable(cntxt.getHive(), tableName, Privilege.SELECT); + } + + ShowPartitionsDesc showParts = work.getShowPartsDesc(); + if (showParts != null) { + String tableName = extractTableName(showParts.getTabName()); + authorizeTable(cntxt.getHive(), tableName, Privilege.SELECT); + } + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzerBase.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzerBase.java new file mode 100644 index 0000000..14b9578 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/cli/SemanticAnalysis/HCatSemanticAnalyzerBase.java @@ -0,0 +1,179 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.cli.SemanticAnalysis; + +import java.io.Serializable; +import java.util.List; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.metadata.AuthorizationException; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.InvalidTableException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.AbstractSemanticAnalyzerHook; +import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.DDLWork; +import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; +import org.apache.hadoop.hive.ql.security.authorization.Privilege; +import org.apache.hadoop.hive.ql.session.SessionState; + +/** + * Base class for HCatSemanticAnalyzer hooks. + */ +public class HCatSemanticAnalyzerBase extends AbstractSemanticAnalyzerHook { + + private HiveAuthorizationProvider authProvider; + + protected String getDbName(Hive hive, String dbName) { + return dbName == null ? hive.getCurrentDatabase() : dbName; + } + + public HiveAuthorizationProvider getAuthProvider() { + if (authProvider == null) { + authProvider = SessionState.get().getAuthorizer(); + } + + return authProvider; + } + + @Override + public void postAnalyze(HiveSemanticAnalyzerHookContext context, + List> rootTasks) throws SemanticException { + super.postAnalyze(context, rootTasks); + + //Authorize the operation. + authorizeDDL(context, rootTasks); + } + + /** + * Checks for the given rootTasks, and calls authorizeDDLWork() for each DDLWork to + * be authorized. The hooks should override this, or authorizeDDLWork to perform the + * actual authorization. + */ + /* + * Impl note: Hive provides authorization with it's own model, and calls the defined + * HiveAuthorizationProvider from Driver.doAuthorization(). However, HCat has to + * do additional calls to the auth provider to implement expected behavior for + * StorageDelegationAuthorizationProvider. This means, that the defined auth provider + * is called by both Hive and HCat. The following are missing from Hive's implementation, + * and when they are fixed in Hive, we can remove the HCat-specific auth checks. + * 1. CREATE DATABASE/TABLE, ADD PARTITION statements does not call + * HiveAuthorizationProvider.authorize() with the candidate objects, which means that + * we cannot do checks against defined LOCATION. + * 2. HiveOperation does not define sufficient Privileges for most of the operations, + * especially database operations. + * 3. For some of the operations, Hive SemanticAnalyzer does not add the changed + * object as a WriteEntity or ReadEntity. + * + * @see https://issues.apache.org/jira/browse/HCATALOG-244 + * @see https://issues.apache.org/jira/browse/HCATALOG-245 + */ + protected void authorizeDDL(HiveSemanticAnalyzerHookContext context, + List> rootTasks) throws SemanticException { + + if (!HiveConf.getBoolVar(context.getConf(), + HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) { + return; + } + + Hive hive; + try { + hive = context.getHive(); + + for (Task task : rootTasks) { + if (task.getWork() instanceof DDLWork) { + DDLWork work = (DDLWork) task.getWork(); + if (work != null) { + authorizeDDLWork(context, hive, work); + } + } + } + } catch (SemanticException ex) { + throw ex; + } catch (AuthorizationException ex) { + throw ex; + } catch (Exception ex) { + throw new SemanticException(ex); + } + } + + /** + * Authorized the given DDLWork. Does nothing by default. Override this + * and delegate to the relevant method in HiveAuthorizationProvider obtained by + * getAuthProvider(). + */ + protected void authorizeDDLWork(HiveSemanticAnalyzerHookContext context, + Hive hive, DDLWork work) throws HiveException { + } + + protected void authorize(Privilege[] inputPrivs, Privilege[] outputPrivs) + throws AuthorizationException, SemanticException { + try { + getAuthProvider().authorize(inputPrivs, outputPrivs); + } catch (HiveException ex) { + throw new SemanticException(ex); + } + } + + protected void authorize(Database db, Privilege priv) + throws AuthorizationException, SemanticException { + try { + getAuthProvider().authorize(db, null, new Privilege[]{priv}); + } catch (HiveException ex) { + throw new SemanticException(ex); + } + } + + protected void authorizeTable(Hive hive, String tableName, Privilege priv) + throws AuthorizationException, HiveException { + Table table; + try { + table = hive.getTable(tableName); + } catch (InvalidTableException ite) { + // Table itself doesn't exist in metastore, nothing to validate. + return; + } + + authorize(table, priv); + } + + protected void authorize(Table table, Privilege priv) + throws AuthorizationException, SemanticException { + try { + getAuthProvider().authorize(table, new Privilege[]{priv}, null); + } catch (HiveException ex) { + throw new SemanticException(ex); + } + } + + protected void authorize(Partition part, Privilege priv) + throws AuthorizationException, SemanticException { + try { + getAuthProvider().authorize(part, new Privilege[]{priv}, null); + } catch (HiveException ex) { + throw new SemanticException(ex); + } + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/ErrorType.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/ErrorType.java new file mode 100644 index 0000000..ecc1d1b --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/ErrorType.java @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.common; + +/** + * Enum type representing the various errors throws by HCat. + */ +public enum ErrorType { + + /* HCat Input Format related errors 1000 - 1999 */ + ERROR_DB_INIT (1000, "Error initializing database session"), + ERROR_EXCEED_MAXPART (1001, "Query result exceeded maximum number of partitions allowed"), + + ERROR_SET_INPUT (1002, "Error setting input information"), + + /* HCat Output Format related errors 2000 - 2999 */ + ERROR_INVALID_TABLE (2000, "Table specified does not exist"), + ERROR_SET_OUTPUT (2001, "Error setting output information"), + ERROR_DUPLICATE_PARTITION (2002, "Partition already present with given partition key values"), + ERROR_NON_EMPTY_TABLE (2003, "Non-partitioned table already contains data"), + ERROR_NOT_INITIALIZED (2004, "HCatOutputFormat not initialized, setOutput has to be called"), + ERROR_INIT_STORAGE_HANDLER (2005, "Error initializing storage handler instance"), + ERROR_PUBLISHING_PARTITION (2006, "Error adding partition to metastore"), + ERROR_SCHEMA_COLUMN_MISMATCH (2007, "Invalid column position in partition schema"), + ERROR_SCHEMA_PARTITION_KEY (2008, "Partition key cannot be present in the partition data"), + ERROR_SCHEMA_TYPE_MISMATCH (2009, "Invalid column type in partition schema"), + ERROR_INVALID_PARTITION_VALUES (2010, "Invalid partition values specified"), + ERROR_MISSING_PARTITION_KEY (2011, "Partition key value not provided for publish"), + ERROR_MOVE_FAILED (2012, "Moving of data failed during commit"), + ERROR_TOO_MANY_DYNAMIC_PTNS (2013, "Attempt to create too many dynamic partitions"), + ERROR_INIT_LOADER (2014, "Error initializing Pig loader"), + ERROR_INIT_STORER (2015, "Error initializing Pig storer"), + ERROR_NOT_SUPPORTED (2016, "Error operation not supported"), + + /* Authorization Errors 3000 - 3999 */ + ERROR_ACCESS_CONTROL (3000, "Permission denied"), + + /* Miscellaneous errors, range 9000 - 9998 */ + ERROR_UNIMPLEMENTED (9000, "Functionality currently unimplemented"), + ERROR_INTERNAL_EXCEPTION (9001, "Exception occurred while processing HCat request"); + + /** The error code. */ + private int errorCode; + + /** The error message. */ + private String errorMessage; + + /** Should the causal exception message be appended to the error message, yes by default*/ + private boolean appendCauseMessage = true; + + /** Is this a retriable error, no by default. */ + private boolean isRetriable = false; + + /** + * Instantiates a new error type. + * @param errorCode the error code + * @param errorMessage the error message + */ + private ErrorType(int errorCode, String errorMessage) { + this.errorCode = errorCode; + this.errorMessage = errorMessage; + } + + /** + * Instantiates a new error type. + * @param errorCode the error code + * @param errorMessage the error message + * @param appendCauseMessage should causal exception message be appended to error message + */ + private ErrorType(int errorCode, String errorMessage, boolean appendCauseMessage) { + this.errorCode = errorCode; + this.errorMessage = errorMessage; + this.appendCauseMessage = appendCauseMessage; + } + + /** + * Instantiates a new error type. + * @param errorCode the error code + * @param errorMessage the error message + * @param appendCauseMessage should causal exception message be appended to error message + * @param isRetriable is this a retriable error + */ + private ErrorType(int errorCode, String errorMessage, boolean appendCauseMessage, boolean isRetriable) { + this.errorCode = errorCode; + this.errorMessage = errorMessage; + this.appendCauseMessage = appendCauseMessage; + this.isRetriable = isRetriable; + } + + /** + * Gets the error code. + * @return the error code + */ + public int getErrorCode() { + return errorCode; + } + + /** + * Gets the error message. + * @return the error message + */ + public String getErrorMessage() { + return errorMessage; + } + + /** + * Checks if this is a retriable error. + * @return true, if is a retriable error, false otherwise + */ + public boolean isRetriable() { + return isRetriable; + } + + /** + * Whether the cause of the exception should be added to the error message. + * @return true, if the cause should be added to the message, false otherwise + */ + public boolean appendCauseMessage() { + return appendCauseMessage; + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java new file mode 100644 index 0000000..56e8c05 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java @@ -0,0 +1,186 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.common; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.mapred.SequenceFileInputFormat; +import org.apache.hadoop.mapred.SequenceFileOutputFormat; + +public final class HCatConstants { + + public static final String HIVE_RCFILE_IF_CLASS = "org.apache.hadoop.hive.ql.io.RCFileInputFormat"; + public static final String HIVE_RCFILE_OF_CLASS = "org.apache.hadoop.hive.ql.io.RCFileOutputFormat"; + + public static final String SEQUENCEFILE_INPUT = SequenceFileInputFormat.class.getName(); + public static final String SEQUENCEFILE_OUTPUT = SequenceFileOutputFormat.class.getName(); + + public static final String HCAT_PIG_STORAGE_CLASS = "org.apache.pig.builtin.PigStorage"; + public static final String HCAT_PIG_LOADER = "hcat.pig.loader"; + public static final String HCAT_PIG_LOADER_LOCATION_SET = HCAT_PIG_LOADER + ".location.set"; + public static final String HCAT_PIG_LOADER_ARGS = "hcat.pig.loader.args"; + public static final String HCAT_PIG_STORER = "hcat.pig.storer"; + public static final String HCAT_PIG_STORER_ARGS = "hcat.pig.storer.args"; + public static final String HCAT_PIG_ARGS_DELIMIT = "hcat.pig.args.delimiter"; + public static final String HCAT_PIG_ARGS_DELIMIT_DEFAULT = ","; + public static final String HCAT_PIG_STORER_LOCATION_SET = HCAT_PIG_STORER + ".location.set"; + public static final String HCAT_PIG_INNER_TUPLE_NAME = "hcat.pig.inner.tuple.name"; + public static final String HCAT_PIG_INNER_TUPLE_NAME_DEFAULT = "innertuple"; + public static final String HCAT_PIG_INNER_FIELD_NAME = "hcat.pig.inner.field.name"; + public static final String HCAT_PIG_INNER_FIELD_NAME_DEFAULT = "innerfield"; + + /** + * {@value} (default: null) + * When the property is set in the UDFContext of the org.apache.hive.hcatalog.pig.HCatStorer, HCatStorer writes + * to the location it specifies instead of the default HCatalog location format. An example can be found + * in org.apache.hive.hcatalog.pig.HCatStorerWrapper. + */ + public static final String HCAT_PIG_STORER_EXTERNAL_LOCATION = HCAT_PIG_STORER + ".external.location"; + + //The keys used to store info into the job Configuration + public static final String HCAT_KEY_BASE = "mapreduce.lib.hcat"; + + public static final String HCAT_KEY_OUTPUT_SCHEMA = HCAT_KEY_BASE + ".output.schema"; + + public static final String HCAT_KEY_JOB_INFO = HCAT_KEY_BASE + ".job.info"; + + // hcatalog specific configurations, that can be put in hive-site.xml + public static final String HCAT_HIVE_CLIENT_EXPIRY_TIME = "hcatalog.hive.client.cache.expiry.time"; + + private HCatConstants() { // restrict instantiation + } + + public static final String HCAT_TABLE_SCHEMA = "hcat.table.schema"; + + public static final String HCAT_METASTORE_URI = HiveConf.ConfVars.METASTOREURIS.varname; + + public static final String HCAT_PERMS = "hcat.perms"; + + public static final String HCAT_GROUP = "hcat.group"; + + public static final String HCAT_CREATE_TBL_NAME = "hcat.create.tbl.name"; + + public static final String HCAT_CREATE_DB_NAME = "hcat.create.db.name"; + + public static final String HCAT_METASTORE_PRINCIPAL + = HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL.varname; + + /** + * The desired number of input splits produced for each partition. When the + * input files are large and few, we want to split them into many splits, + * so as to increase the parallelizm of loading the splits. Try also two + * other parameters, mapred.min.split.size and mapred.max.split.size, to + * control the number of input splits. + */ + public static final String HCAT_DESIRED_PARTITION_NUM_SPLITS = + "hcat.desired.partition.num.splits"; + + // IMPORTANT IMPORTANT IMPORTANT!!!!! + //The keys used to store info into the job Configuration. + //If any new keys are added, the HCatStorer needs to be updated. The HCatStorer + //updates the job configuration in the backend to insert these keys to avoid + //having to call setOutput from the backend (which would cause a metastore call + //from the map jobs) + public static final String HCAT_KEY_OUTPUT_BASE = "mapreduce.lib.hcatoutput"; + public static final String HCAT_KEY_OUTPUT_INFO = HCAT_KEY_OUTPUT_BASE + ".info"; + public static final String HCAT_KEY_HIVE_CONF = HCAT_KEY_OUTPUT_BASE + ".hive.conf"; + public static final String HCAT_KEY_TOKEN_SIGNATURE = HCAT_KEY_OUTPUT_BASE + ".token.sig"; + + public static final String[] OUTPUT_CONFS_TO_SAVE = { + HCAT_KEY_OUTPUT_INFO, + HCAT_KEY_HIVE_CONF, + HCAT_KEY_TOKEN_SIGNATURE + }; + + + public static final String HCAT_MSG_CLEAN_FREQ = "hcat.msg.clean.freq"; + public static final String HCAT_MSG_EXPIRY_DURATION = "hcat.msg.expiry.duration"; + + public static final String HCAT_MSGBUS_TOPIC_NAME = "hcat.msgbus.topic.name"; + public static final String HCAT_MSGBUS_TOPIC_NAMING_POLICY = "hcat.msgbus.topic.naming.policy"; + public static final String HCAT_MSGBUS_TOPIC_PREFIX = "hcat.msgbus.topic.prefix"; + + public static final String HCAT_DYNAMIC_PTN_JOBID = HCAT_KEY_OUTPUT_BASE + "dynamic.jobid"; + public static final boolean HCAT_IS_DYNAMIC_MAX_PTN_CHECK_ENABLED = false; + + // Message Bus related properties. + public static final String HCAT_DEFAULT_TOPIC_PREFIX = "hcat"; + public static final String HCAT_EVENT = "HCAT_EVENT"; + public static final String HCAT_ADD_PARTITION_EVENT = "ADD_PARTITION"; + public static final String HCAT_DROP_PARTITION_EVENT = "DROP_PARTITION"; + public static final String HCAT_PARTITION_DONE_EVENT = "PARTITION_DONE"; + public static final String HCAT_CREATE_TABLE_EVENT = "CREATE_TABLE"; + public static final String HCAT_DROP_TABLE_EVENT = "DROP_TABLE"; + public static final String HCAT_CREATE_DATABASE_EVENT = "CREATE_DATABASE"; + public static final String HCAT_DROP_DATABASE_EVENT = "DROP_DATABASE"; + public static final String HCAT_MESSAGE_VERSION = "HCAT_MESSAGE_VERSION"; + public static final String HCAT_MESSAGE_FORMAT = "HCAT_MESSAGE_FORMAT"; + public static final String CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX = "hcatalog.message.factory.impl."; + public static final String CONF_LABEL_HCAT_MESSAGE_FORMAT = "hcatalog.message.format"; + public static final String DEFAULT_MESSAGE_FACTORY_IMPL = "org.apache.hive.hcatalog.messaging.json.JSONMessageFactory"; + + // System environment variables + public static final String SYSENV_HADOOP_TOKEN_FILE_LOCATION = "HADOOP_TOKEN_FILE_LOCATION"; + + // Hadoop Conf Var Names + public static final String CONF_MAPREDUCE_JOB_CREDENTIALS_BINARY = "mapreduce.job.credentials.binary"; + + //*************************************************************************** + // Data-related configuration properties. + //*************************************************************************** + + /** + * {@value} (default: {@value #HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT}). + * Pig < 0.10.0 does not have boolean support, and scripts written for pre-boolean Pig versions + * will not expect boolean values when upgrading Pig. For integration the option is offered to + * convert boolean fields to integers by setting this Hadoop configuration key. + */ + public static final String HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER = + "hcat.data.convert.boolean.to.integer"; + public static final boolean HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT = false; + + /** + * {@value} (default: {@value #HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT}). + * Hive tables support tinyint and smallint columns, while not all processing frameworks support + * these types (Pig only has integer for example). Enable this property to promote tinyint and + * smallint columns to integer at runtime. Note that writes to tinyint and smallint columns + * enforce bounds checking and jobs will fail if attempting to write values outside the column + * bounds. + */ + public static final String HCAT_DATA_TINY_SMALL_INT_PROMOTION = + "hcat.data.tiny.small.int.promotion"; + public static final boolean HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT = false; + + /** + * {@value} (default: {@value #HCAT_INPUT_BAD_RECORD_THRESHOLD_DEFAULT}). + * Threshold for the ratio of bad records that will be silently skipped without causing a task + * failure. This is useful when processing large data sets with corrupt records, when its + * acceptable to skip some bad records. + */ + public static final String HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY = "hcat.input.bad.record.threshold"; + public static final float HCAT_INPUT_BAD_RECORD_THRESHOLD_DEFAULT = 0.0001f; + + /** + * {@value} (default: {@value #HCAT_INPUT_BAD_RECORD_MIN_DEFAULT}). + * Number of bad records that will be accepted before applying + * {@value #HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY}. This is necessary to prevent an initial bad + * record from causing a task failure. + */ + public static final String HCAT_INPUT_BAD_RECORD_MIN_KEY = "hcat.input.bad.record.min"; + public static final int HCAT_INPUT_BAD_RECORD_MIN_DEFAULT = 2; +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatContext.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatContext.java new file mode 100644 index 0000000..aa3b5fe --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatContext.java @@ -0,0 +1,87 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.common; + +import com.google.common.base.Optional; +import com.google.common.base.Preconditions; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.classification.InterfaceAudience; +import org.apache.hadoop.hive.common.classification.InterfaceStability; + +import java.util.Map; + +/** + * HCatContext is a singleton that provides global access to configuration data. + * + *

HCatalog provides a variety of functionality that users can configure at runtime through + * configuration properties. Available configuration properties are defined in + * {@link HCatConstants}. HCatContext allows users to enable optional functionality by + * setting properties in a provided configuration.

+ * + *

HCatalog users (MR apps, processing framework adapters) should set properties + * in a configuration that has been provided to + * {@link #setConf(org.apache.hadoop.conf.Configuration)} to enable optional functionality. + * The job configuration must be used to ensure properties are passed to the backend MR tasks.

+ * + *

HCatalog developers should enable optional functionality by checking properties + * from {@link #getConf()}. Since users are not obligated to set a configuration, optional + * functionality must provide a sensible default.

+ */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public enum HCatContext { + INSTANCE; + + private Configuration conf = null; + + /** + * Use the given configuration for optional behavior. Keys exclusive to an existing config + * are set in the new conf. The job conf must be used to ensure properties are passed to + * backend MR tasks. + */ + public synchronized HCatContext setConf(Configuration newConf) { + Preconditions.checkNotNull(newConf, "Required parameter 'newConf' must not be null."); + + if (conf == null) { + conf = newConf; + return this; + } + + if (conf != newConf) { + for (Map.Entry entry : conf) { + if ((entry.getKey().matches("hcat.*")) && (newConf.get(entry.getKey()) == null)) { + newConf.set(entry.getKey(), entry.getValue()); + } + } + conf = newConf; + } + return this; + } + + /** + * Get the configuration, if there is one. Users are not required to setup HCatContext + * unless they wish to override default behavior, so the configuration may not be present. + * + * @return an Optional that might contain a Configuration + */ + public Optional getConf() { + return Optional.fromNullable(conf); + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatException.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatException.java new file mode 100644 index 0000000..77e8abc --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatException.java @@ -0,0 +1,159 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.common; + +import java.io.IOException; + +/** + * Class representing exceptions thrown by HCat. + */ +public class HCatException extends IOException { + + private static final long serialVersionUID = 1L; + + /** The error type enum for this exception. */ + private final ErrorType errorType; + + /** + * Instantiates a new hcat exception. + * @param errorType the error type + */ + public HCatException(ErrorType errorType) { + this(errorType, null, null); + } + + + /** + * Instantiates a new hcat exception. + * @param errorType the error type + * @param cause the cause + */ + public HCatException(ErrorType errorType, Throwable cause) { + this(errorType, null, cause); + } + + /** + * Instantiates a new hcat exception. + * @param errorType the error type + * @param extraMessage extra messages to add to the message string + */ + public HCatException(ErrorType errorType, String extraMessage) { + this(errorType, extraMessage, null); + } + + /** + * Instantiates a new hcat exception. + * @param errorType the error type + * @param extraMessage extra messages to add to the message string + * @param cause the cause + */ + public HCatException(ErrorType errorType, String extraMessage, Throwable cause) { + super(buildErrorMessage( + errorType, + extraMessage, + cause), cause); + this.errorType = errorType; + } + + + //TODO : remove default error type constructors after all exceptions + //are changed to use error types + + /** + * Instantiates a new hcat exception. + * @param message the error message + */ + public HCatException(String message) { + this(ErrorType.ERROR_INTERNAL_EXCEPTION, message, null); + } + + /** + * Instantiates a new hcat exception. + * @param message the error message + * @param cause the cause + */ + public HCatException(String message, Throwable cause) { + this(ErrorType.ERROR_INTERNAL_EXCEPTION, message, cause); + } + + + /** + * Builds the error message string. The error type message is appended with the extra message. If appendCause + * is true for the error type, then the message of the cause also is added to the message. + * @param type the error type + * @param extraMessage the extra message string + * @param cause the cause for the exception + * @return the exception message string + */ + public static String buildErrorMessage(ErrorType type, String extraMessage, Throwable cause) { + + //Initial message is just the error type message + StringBuffer message = new StringBuffer(HCatException.class.getName()); + message.append(" : " + type.getErrorCode()); + message.append(" : " + type.getErrorMessage()); + + if (extraMessage != null) { + //Add the extra message value to buffer + message.append(" : " + extraMessage); + } + + if (type.appendCauseMessage()) { + if (cause != null) { + //Add the cause message to buffer + message.append(". Cause : " + cause.toString()); + } + } + + return message.toString(); + } + + + /** + * Is this a retriable error. + * @return is it retriable + */ + public boolean isRetriable() { + return errorType.isRetriable(); + } + + /** + * Gets the error type. + * @return the error type enum + */ + public ErrorType getErrorType() { + return errorType; + } + + /** + * Gets the error code. + * @return the error code + */ + public int getErrorCode() { + return errorType.getErrorCode(); + } + + /* (non-Javadoc) + * @see java.lang.Throwable#toString() + */ + @Override + public String toString() { + return getMessage(); + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatUtil.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatUtil.java new file mode 100644 index 0000000..841857a --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatUtil.java @@ -0,0 +1,627 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.common; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.hive.common.JavaUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.thrift.DelegationTokenIdentifier; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.JobClient; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hive.hcatalog.data.Pair; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils; +import org.apache.hive.hcatalog.mapreduce.FosterStorageHandler; +import org.apache.hive.hcatalog.mapreduce.HCatOutputFormat; +import org.apache.hive.hcatalog.mapreduce.HCatStorageHandler; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; +import org.apache.hive.hcatalog.mapreduce.OutputJobInfo; +import org.apache.hive.hcatalog.mapreduce.PartInfo; +import org.apache.hive.hcatalog.mapreduce.StorerInfo; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.security.auth.login.LoginException; + +public class HCatUtil { + + private static final Logger LOG = LoggerFactory.getLogger(HCatUtil.class); + private static volatile HiveClientCache hiveClientCache; + private final static int DEFAULT_HIVE_CACHE_EXPIRY_TIME_SECONDS = 2 * 60; + + public static boolean checkJobContextIfRunningFromBackend(JobContext j) { + if (j.getConfiguration().get("mapred.task.id", "").equals("") && + !("true".equals(j.getConfiguration().get("pig.illustrating")))) { + return false; + } + return true; + } + + public static String serialize(Serializable obj) throws IOException { + if (obj == null) { + return ""; + } + try { + ByteArrayOutputStream serialObj = new ByteArrayOutputStream(); + ObjectOutputStream objStream = new ObjectOutputStream(serialObj); + objStream.writeObject(obj); + objStream.close(); + return encodeBytes(serialObj.toByteArray()); + } catch (Exception e) { + throw new IOException("Serialization error: " + e.getMessage(), e); + } + } + + public static Object deserialize(String str) throws IOException { + if (str == null || str.length() == 0) { + return null; + } + try { + ByteArrayInputStream serialObj = new ByteArrayInputStream( + decodeBytes(str)); + ObjectInputStream objStream = new ObjectInputStream(serialObj); + return objStream.readObject(); + } catch (Exception e) { + throw new IOException("Deserialization error: " + e.getMessage(), e); + } + } + + public static String encodeBytes(byte[] bytes) { + StringBuffer strBuf = new StringBuffer(); + + for (int i = 0; i < bytes.length; i++) { + strBuf.append((char) (((bytes[i] >> 4) & 0xF) + ('a'))); + strBuf.append((char) (((bytes[i]) & 0xF) + ('a'))); + } + + return strBuf.toString(); + } + + public static byte[] decodeBytes(String str) { + byte[] bytes = new byte[str.length() / 2]; + for (int i = 0; i < str.length(); i += 2) { + char c = str.charAt(i); + bytes[i / 2] = (byte) ((c - 'a') << 4); + c = str.charAt(i + 1); + bytes[i / 2] += (c - 'a'); + } + return bytes; + } + + public static List getHCatFieldSchemaList( + FieldSchema... fields) throws HCatException { + List result = new ArrayList( + fields.length); + + for (FieldSchema f : fields) { + result.add(HCatSchemaUtils.getHCatFieldSchema(f)); + } + + return result; + } + + public static List getHCatFieldSchemaList( + List fields) throws HCatException { + if (fields == null) { + return null; + } else { + List result = new ArrayList(); + for (FieldSchema f : fields) { + result.add(HCatSchemaUtils.getHCatFieldSchema(f)); + } + return result; + } + } + + public static HCatSchema extractSchema(Table table) throws HCatException { + return new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getCols())); + } + + public static HCatSchema extractSchema(Partition partition) throws HCatException { + return new HCatSchema(HCatUtil.getHCatFieldSchemaList(partition.getCols())); + } + + public static List getFieldSchemaList( + List hcatFields) { + if (hcatFields == null) { + return null; + } else { + List result = new ArrayList(); + for (HCatFieldSchema f : hcatFields) { + result.add(HCatSchemaUtils.getFieldSchema(f)); + } + return result; + } + } + + public static Table getTable(HiveMetaStoreClient client, String dbName, String tableName) + throws NoSuchObjectException, TException, MetaException { + return new Table(client.getTable(dbName, tableName)); + } + + public static HCatSchema getTableSchemaWithPtnCols(Table table) throws IOException { + HCatSchema tableSchema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getCols())); + + if (table.getPartitionKeys().size() != 0) { + + // add partition keys to table schema + // NOTE : this assumes that we do not ever have ptn keys as columns + // inside the table schema as well! + for (FieldSchema fs : table.getPartitionKeys()) { + tableSchema.append(HCatSchemaUtils.getHCatFieldSchema(fs)); + } + } + return tableSchema; + } + + /** + * return the partition columns from a table instance + * + * @param table the instance to extract partition columns from + * @return HCatSchema instance which contains the partition columns + * @throws IOException + */ + public static HCatSchema getPartitionColumns(Table table) throws IOException { + HCatSchema cols = new HCatSchema(new LinkedList()); + if (table.getPartitionKeys().size() != 0) { + for (FieldSchema fs : table.getPartitionKeys()) { + cols.append(HCatSchemaUtils.getHCatFieldSchema(fs)); + } + } + return cols; + } + + /** + * Validate partition schema, checks if the column types match between the + * partition and the existing table schema. Returns the list of columns + * present in the partition but not in the table. + * + * @param table the table + * @param partitionSchema the partition schema + * @return the list of newly added fields + * @throws IOException Signals that an I/O exception has occurred. + */ + public static List validatePartitionSchema(Table table, + HCatSchema partitionSchema) throws IOException { + Map partitionKeyMap = new HashMap(); + + for (FieldSchema field : table.getPartitionKeys()) { + partitionKeyMap.put(field.getName().toLowerCase(), field); + } + + List tableCols = table.getCols(); + List newFields = new ArrayList(); + + for (int i = 0; i < partitionSchema.getFields().size(); i++) { + + FieldSchema field = HCatSchemaUtils.getFieldSchema(partitionSchema + .getFields().get(i)); + + FieldSchema tableField; + if (i < tableCols.size()) { + tableField = tableCols.get(i); + + if (!tableField.getName().equalsIgnoreCase(field.getName())) { + throw new HCatException( + ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, + "Expected column <" + tableField.getName() + + "> at position " + (i + 1) + + ", found column <" + field.getName() + + ">"); + } + } else { + tableField = partitionKeyMap.get(field.getName().toLowerCase()); + + if (tableField != null) { + throw new HCatException( + ErrorType.ERROR_SCHEMA_PARTITION_KEY, "Key <" + + field.getName() + ">"); + } + } + + if (tableField == null) { + // field present in partition but not in table + newFields.add(field); + } else { + // field present in both. validate type has not changed + TypeInfo partitionType = TypeInfoUtils + .getTypeInfoFromTypeString(field.getType()); + TypeInfo tableType = TypeInfoUtils + .getTypeInfoFromTypeString(tableField.getType()); + + if (!partitionType.equals(tableType)) { + throw new HCatException( + ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, "Column <" + + field.getName() + ">, expected <" + + tableType.getTypeName() + ">, got <" + + partitionType.getTypeName() + ">"); + } + } + } + + return newFields; + } + + /** + * Test if the first FsAction is more permissive than the second. This is + * useful in cases where we want to ensure that a file owner has more + * permissions than the group they belong to, for eg. More completely(but + * potentially more cryptically) owner-r >= group-r >= world-r : bitwise + * and-masked with 0444 => 444 >= 440 >= 400 >= 000 owner-w >= group-w >= + * world-w : bitwise and-masked with &0222 => 222 >= 220 >= 200 >= 000 + * owner-x >= group-x >= world-x : bitwise and-masked with &0111 => 111 >= + * 110 >= 100 >= 000 + * + * @return true if first FsAction is more permissive than the second, false + * if not. + */ + public static boolean validateMorePermissive(FsAction first, FsAction second) { + if ((first == FsAction.ALL) || (second == FsAction.NONE) + || (first == second)) { + return true; + } + switch (first) { + case READ_EXECUTE: + return ((second == FsAction.READ) || (second == FsAction.EXECUTE)); + case READ_WRITE: + return ((second == FsAction.READ) || (second == FsAction.WRITE)); + case WRITE_EXECUTE: + return ((second == FsAction.WRITE) || (second == FsAction.EXECUTE)); + } + return false; + } + + /** + * Ensure that read or write permissions are not granted without also + * granting execute permissions. Essentially, r-- , rw- and -w- are invalid, + * r-x, -wx, rwx, ---, --x are valid + * + * @param perms The FsAction to verify + * @return true if the presence of read or write permission is accompanied + * by execute permissions + */ + public static boolean validateExecuteBitPresentIfReadOrWrite(FsAction perms) { + if ((perms == FsAction.READ) || (perms == FsAction.WRITE) + || (perms == FsAction.READ_WRITE)) { + return false; + } + return true; + } + + public static Token getJobTrackerDelegationToken( + Configuration conf, String userName) throws Exception { + // LOG.info("getJobTrackerDelegationToken("+conf+","+userName+")"); + JobClient jcl = new JobClient(new JobConf(conf, HCatOutputFormat.class)); + Token t = jcl + .getDelegationToken(new Text(userName)); + // LOG.info("got "+t); + return t; + + // return null; + } + + public static Token extractThriftToken( + String tokenStrForm, String tokenSignature) throws MetaException, + TException, IOException { + // LOG.info("extractThriftToken("+tokenStrForm+","+tokenSignature+")"); + Token t = new Token(); + t.decodeFromUrlString(tokenStrForm); + t.setService(new Text(tokenSignature)); + // LOG.info("returning "+t); + return t; + } + + /** + * Create an instance of a storage handler defined in storerInfo. If one cannot be found + * then FosterStorageHandler is used to encapsulate the InputFormat, OutputFormat and SerDe. + * This StorageHandler assumes the other supplied storage artifacts are for a file-based storage system. + * @param conf job's configuration will be used to configure the Configurable StorageHandler + * @param storerInfo StorerInfo to definining the StorageHandler and InputFormat, OutputFormat and SerDe + * @return storageHandler instance + * @throws IOException + */ + public static HCatStorageHandler getStorageHandler(Configuration conf, StorerInfo storerInfo) throws IOException { + return getStorageHandler(conf, + storerInfo.getStorageHandlerClass(), + storerInfo.getSerdeClass(), + storerInfo.getIfClass(), + storerInfo.getOfClass()); + } + + public static HCatStorageHandler getStorageHandler(Configuration conf, PartInfo partitionInfo) throws IOException { + return HCatUtil.getStorageHandler( + conf, + partitionInfo.getStorageHandlerClassName(), + partitionInfo.getSerdeClassName(), + partitionInfo.getInputFormatClassName(), + partitionInfo.getOutputFormatClassName()); + } + + /** + * Create an instance of a storage handler. If storageHandler == null, + * then surrrogate StorageHandler is used to encapsulate the InputFormat, OutputFormat and SerDe. + * This StorageHandler assumes the other supplied storage artifacts are for a file-based storage system. + * @param conf job's configuration will be used to configure the Configurable StorageHandler + * @param storageHandler fully qualified class name of the desired StorageHandle instance + * @param serDe fully qualified class name of the desired SerDe instance + * @param inputFormat fully qualified class name of the desired InputFormat instance + * @param outputFormat fully qualified class name of the desired outputFormat instance + * @return storageHandler instance + * @throws IOException + */ + public static HCatStorageHandler getStorageHandler(Configuration conf, + String storageHandler, + String serDe, + String inputFormat, + String outputFormat) + throws IOException { + + if ((storageHandler == null) || (storageHandler.equals(FosterStorageHandler.class.getName()))) { + try { + FosterStorageHandler fosterStorageHandler = + new FosterStorageHandler(inputFormat, outputFormat, serDe); + fosterStorageHandler.setConf(conf); + return fosterStorageHandler; + } catch (ClassNotFoundException e) { + throw new IOException("Failed to load " + + "foster storage handler", e); + } + } + + try { + Class handlerClass = + (Class) Class + .forName(storageHandler, true, JavaUtils.getClassLoader()); + return (HCatStorageHandler) ReflectionUtils.newInstance( + handlerClass, conf); + } catch (ClassNotFoundException e) { + throw new IOException("Error in loading storage handler." + + e.getMessage(), e); + } + } + + public static Pair getDbAndTableName(String tableName) throws IOException { + String[] dbTableNametokens = tableName.split("\\."); + if (dbTableNametokens.length == 1) { + return new Pair(MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); + } else if (dbTableNametokens.length == 2) { + return new Pair(dbTableNametokens[0], dbTableNametokens[1]); + } else { + throw new IOException("tableName expected in the form " + + ".
or
. Got " + tableName); + } + } + + public static Map + getInputJobProperties(HCatStorageHandler storageHandler, + InputJobInfo inputJobInfo) { + TableDesc tableDesc = new TableDesc(storageHandler.getSerDeClass(), + storageHandler.getInputFormatClass(), + storageHandler.getOutputFormatClass(), + inputJobInfo.getTableInfo().getStorerInfo().getProperties()); + if (tableDesc.getJobProperties() == null) { + tableDesc.setJobProperties(new HashMap()); + } + + Map jobProperties = new HashMap(); + try { + tableDesc.getJobProperties().put( + HCatConstants.HCAT_KEY_JOB_INFO, + HCatUtil.serialize(inputJobInfo)); + + storageHandler.configureInputJobProperties(tableDesc, + jobProperties); + + } catch (IOException e) { + throw new IllegalStateException( + "Failed to configure StorageHandler", e); + } + + return jobProperties; + } + + @InterfaceAudience.Private + @InterfaceStability.Evolving + public static void + configureOutputStorageHandler(HCatStorageHandler storageHandler, + Configuration conf, + OutputJobInfo outputJobInfo) { + //TODO replace IgnoreKeyTextOutputFormat with a + //HiveOutputFormatWrapper in StorageHandler + TableDesc tableDesc = new TableDesc(storageHandler.getSerDeClass(), + storageHandler.getInputFormatClass(), + IgnoreKeyTextOutputFormat.class, + outputJobInfo.getTableInfo().getStorerInfo().getProperties()); + if (tableDesc.getJobProperties() == null) + tableDesc.setJobProperties(new HashMap()); + for (Map.Entry el : conf) { + tableDesc.getJobProperties().put(el.getKey(), el.getValue()); + } + + Map jobProperties = new HashMap(); + try { + tableDesc.getJobProperties().put( + HCatConstants.HCAT_KEY_OUTPUT_INFO, + HCatUtil.serialize(outputJobInfo)); + + storageHandler.configureOutputJobProperties(tableDesc, + jobProperties); + + for (Map.Entry el : jobProperties.entrySet()) { + conf.set(el.getKey(), el.getValue()); + } + } catch (IOException e) { + throw new IllegalStateException( + "Failed to configure StorageHandler", e); + } + } + + /** + * Replace the contents of dest with the contents of src + * @param src + * @param dest + */ + public static void copyConf(Configuration src, Configuration dest) { + dest.clear(); + for (Map.Entry el : src) { + dest.set(el.getKey(), el.getValue()); + } + } + + /** + * Get or create a hive client depending on whether it exits in cache or not + * @param hiveConf The hive configuration + * @return the client + * @throws MetaException When HiveMetaStoreClient couldn't be created + * @throws IOException + */ + public static HiveMetaStoreClient getHiveClient(HiveConf hiveConf) + throws MetaException, IOException { + + // Singleton behaviour: create the cache instance if required. The cache needs to be created lazily and + // using the expiry time available in hiveConf. + + if (hiveClientCache == null) { + synchronized (HiveMetaStoreClient.class) { + if (hiveClientCache == null) { + hiveClientCache = new HiveClientCache(hiveConf.getInt(HCatConstants.HCAT_HIVE_CLIENT_EXPIRY_TIME, + DEFAULT_HIVE_CACHE_EXPIRY_TIME_SECONDS)); + } + } + } + try { + return hiveClientCache.get(hiveConf); + } catch (LoginException e) { + throw new IOException("Couldn't create hiveMetaStoreClient, Error getting UGI for user", e); + } + } + + public static void closeHiveClientQuietly(HiveMetaStoreClient client) { + try { + if (client != null) + client.close(); + } catch (Exception e) { + LOG.debug("Error closing metastore client. Ignored the error.", e); + } + } + + public static HiveConf getHiveConf(Configuration conf) + throws IOException { + + HiveConf hiveConf = new HiveConf(conf, HCatUtil.class); + + //copy the hive conf into the job conf and restore it + //in the backend context + if (conf.get(HCatConstants.HCAT_KEY_HIVE_CONF) == null) { + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(hiveConf.getAllProperties())); + } else { + //Copy configuration properties into the hive conf + Properties properties = (Properties) HCatUtil.deserialize( + conf.get(HCatConstants.HCAT_KEY_HIVE_CONF)); + + for (Map.Entry prop : properties.entrySet()) { + if (prop.getValue() instanceof String) { + hiveConf.set((String) prop.getKey(), (String) prop.getValue()); + } else if (prop.getValue() instanceof Integer) { + hiveConf.setInt((String) prop.getKey(), + (Integer) prop.getValue()); + } else if (prop.getValue() instanceof Boolean) { + hiveConf.setBoolean((String) prop.getKey(), + (Boolean) prop.getValue()); + } else if (prop.getValue() instanceof Long) { + hiveConf.setLong((String) prop.getKey(), (Long) prop.getValue()); + } else if (prop.getValue() instanceof Float) { + hiveConf.setFloat((String) prop.getKey(), + (Float) prop.getValue()); + } + } + } + + if (conf.get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { + hiveConf.set("hive.metastore.token.signature", + conf.get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE)); + } + + return hiveConf; + } + + + public static JobConf getJobConfFromContext(JobContext jobContext) { + JobConf jobConf; + // we need to convert the jobContext into a jobConf + // 0.18 jobConf (Hive) vs 0.20+ jobContext (HCat) + // begin conversion.. + jobConf = new JobConf(jobContext.getConfiguration()); + // ..end of conversion + + + return jobConf; + } + + public static void copyJobPropertiesToJobConf( + Map jobProperties, JobConf jobConf) { + for (Map.Entry entry : jobProperties.entrySet()) { + jobConf.set(entry.getKey(), entry.getValue()); + } + } + + + public static boolean isHadoop23() { + String version = org.apache.hadoop.util.VersionInfo.getVersion(); + if (version.matches("\\b0\\.23\\..+\\b")||version.matches("\\b2\\..*")) + return true; + return false; + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HiveClientCache.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HiveClientCache.java new file mode 100644 index 0000000..3c9a86b --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HiveClientCache.java @@ -0,0 +1,337 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.common; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.RemovalListener; +import com.google.common.cache.RemovalNotification; +import org.apache.commons.lang.builder.EqualsBuilder; +import org.apache.commons.lang.builder.HashCodeBuilder; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.security.auth.login.LoginException; +import java.io.IOException; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * A thread safe time expired cache for HiveMetaStoreClient + */ +class HiveClientCache { + final private Cache hiveCache; + private static final Logger LOG = LoggerFactory.getLogger(HiveClientCache.class); + private final int timeout; + // This lock is used to make sure removalListener won't close a client that is being contemplated for returning by get() + private final Object CACHE_TEARDOWN_LOCK = new Object(); + + private static final AtomicInteger nextId = new AtomicInteger(0); + + // Since HiveMetaStoreClient is not threadsafe, hive clients are not shared across threads. + // Thread local variable containing each thread's unique ID, is used as one of the keys for the cache + // causing each thread to get a different client even if the hiveConf is same. + private static final ThreadLocal threadId = + new ThreadLocal() { + @Override + protected Integer initialValue() { + return nextId.getAndIncrement(); + } + }; + + private int getThreadId() { + return threadId.get(); + } + + /** + * @param timeout the length of time in seconds after a client is created that it should be automatically removed + */ + public HiveClientCache(final int timeout) { + this.timeout = timeout; + RemovalListener removalListener = + new RemovalListener() { + public void onRemoval(RemovalNotification notification) { + CacheableHiveMetaStoreClient hiveMetaStoreClient = notification.getValue(); + if (hiveMetaStoreClient != null) { + synchronized (CACHE_TEARDOWN_LOCK) { + hiveMetaStoreClient.setExpiredFromCache(); + hiveMetaStoreClient.tearDownIfUnused(); + } + } + } + }; + hiveCache = CacheBuilder.newBuilder() + .expireAfterWrite(timeout, TimeUnit.SECONDS) + .removalListener(removalListener) + .build(); + + // Add a shutdown hook for cleanup, if there are elements remaining in the cache which were not cleaned up. + // This is the best effort approach. Ignore any error while doing so. Notice that most of the clients + // would get cleaned up via either the removalListener or the close() call, only the active clients + // that are in the cache or expired but being used in other threads wont get cleaned. The following code will only + // clean the active cache ones. The ones expired from cache but being hold by other threads are in the mercy + // of finalize() being called. + Thread cleanupHiveClientShutdownThread = new Thread() { + @Override + public void run() { + LOG.debug("Cleaning up hive client cache in ShutDown hook"); + closeAllClientsQuietly(); + } + }; + Runtime.getRuntime().addShutdownHook(cleanupHiveClientShutdownThread); + } + + /** + * Note: This doesn't check if they are being used or not, meant only to be called during shutdown etc. + */ + void closeAllClientsQuietly() { + try { + ConcurrentMap elements = hiveCache.asMap(); + for (CacheableHiveMetaStoreClient cacheableHiveMetaStoreClient : elements.values()) { + cacheableHiveMetaStoreClient.tearDown(); + } + } catch (Exception e) { + LOG.warn("Clean up of hive clients in the cache failed. Ignored", e); + } + } + + public void cleanup() { + hiveCache.cleanUp(); + } + + /** + * Returns a cached client if exists or else creates one, caches and returns it. It also checks that the client is + * healthy and can be reused + * @param hiveConf + * @return the hive client + * @throws MetaException + * @throws IOException + * @throws LoginException + */ + public HiveMetaStoreClient get(final HiveConf hiveConf) throws MetaException, IOException, LoginException { + final HiveClientCacheKey cacheKey = HiveClientCacheKey.fromHiveConf(hiveConf, getThreadId()); + CacheableHiveMetaStoreClient hiveMetaStoreClient = null; + // the hmsc is not shared across threads. So the only way it could get closed while we are doing healthcheck + // is if removalListener closes it. The synchronization takes care that removalListener won't do it + synchronized (CACHE_TEARDOWN_LOCK) { + hiveMetaStoreClient = getOrCreate(cacheKey); + hiveMetaStoreClient.acquire(); + } + if (!hiveMetaStoreClient.isOpen()) { + synchronized (CACHE_TEARDOWN_LOCK) { + hiveCache.invalidate(cacheKey); + hiveMetaStoreClient.close(); + hiveMetaStoreClient = getOrCreate(cacheKey); + hiveMetaStoreClient.acquire(); + } + } + return hiveMetaStoreClient; + } + + /** + * Return from cache if exists else create/cache and return + * @param cacheKey + * @return + * @throws IOException + * @throws MetaException + * @throws LoginException + */ + private CacheableHiveMetaStoreClient getOrCreate(final HiveClientCacheKey cacheKey) throws IOException, MetaException, LoginException { + try { + return hiveCache.get(cacheKey, new Callable() { + @Override + public CacheableHiveMetaStoreClient call() throws MetaException { + return new CacheableHiveMetaStoreClient(cacheKey.getHiveConf(), timeout); + } + }); + } catch (ExecutionException e) { + Throwable t = e.getCause(); + if (t instanceof IOException) { + throw (IOException) t; + } else if (t instanceof MetaException) { + throw (MetaException) t; + } else if (t instanceof LoginException) { + throw (LoginException) t; + } else { + throw new IOException("Error creating hiveMetaStoreClient", t); + } + } + } + + /** + * A class to wrap HiveConf and expose equality based only on UserGroupInformation and the metaStoreURIs. + * This becomes the key for the cache and this way the same HiveMetaStoreClient would be returned if + * UserGroupInformation and metaStoreURIs are same. This function can evolve to express + * the cases when HiveConf is different but the same hiveMetaStoreClient can be used + */ + public static class HiveClientCacheKey { + final private String metaStoreURIs; + final private UserGroupInformation ugi; + final private HiveConf hiveConf; + final private int threadId; + + private HiveClientCacheKey(HiveConf hiveConf, final int threadId) throws IOException, LoginException { + this.metaStoreURIs = hiveConf.getVar(HiveConf.ConfVars.METASTOREURIS); + ugi = ShimLoader.getHadoopShims().getUGIForConf(hiveConf); + this.hiveConf = hiveConf; + this.threadId = threadId; + } + + public static HiveClientCacheKey fromHiveConf(HiveConf hiveConf, final int threadId) throws IOException, LoginException { + return new HiveClientCacheKey(hiveConf, threadId); + } + + public HiveConf getHiveConf() { + return hiveConf; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + HiveClientCacheKey that = (HiveClientCacheKey) o; + return new EqualsBuilder(). + append(this.metaStoreURIs, + that.metaStoreURIs). + append(this.ugi, that.ugi). + append(this.threadId, that.threadId).isEquals(); + } + + @Override + public int hashCode() { + return new HashCodeBuilder(). + append(metaStoreURIs). + append(ugi). + append(threadId).toHashCode(); + } + } + + /** + * Add # of current users on HiveMetaStoreClient, so that the client can be cleaned when no one is using it. + */ + public static class CacheableHiveMetaStoreClient extends HiveMetaStoreClient { + private AtomicInteger users = new AtomicInteger(0); + private volatile boolean expiredFromCache = false; + private boolean isClosed = false; + private final long expiryTime; + private static final int EXPIRY_TIME_EXTENSION_IN_MILLIS = 60 * 1000; + + public CacheableHiveMetaStoreClient(final HiveConf conf, final int timeout) throws MetaException { + super(conf); + // Extend the expiry time with some extra time on top of guava expiry time to make sure + // that items closed() are for sure expired and would never be returned by guava. + this.expiryTime = System.currentTimeMillis() + timeout * 1000 + EXPIRY_TIME_EXTENSION_IN_MILLIS; + } + + private void acquire() { + users.incrementAndGet(); + } + + private void release() { + users.decrementAndGet(); + } + + public void setExpiredFromCache() { + expiredFromCache = true; + } + + public boolean isClosed() { + return isClosed; + } + + /** + * Make a call to hive meta store and see if the client is still usable. Some calls where the user provides + * invalid data renders the client unusable for future use (example: create a table with very long table name) + * @return + */ + protected boolean isOpen() { + try { + // Look for an unlikely database name and see if either MetaException or TException is thrown + this.getDatabase("NonExistentDatabaseUsedForHealthCheck"); + } catch (NoSuchObjectException e) { + return true; // It is okay if the database doesn't exist + } catch (MetaException e) { + return false; + } catch (TException e) { + return false; + } + return true; + } + + /** + * Decrement the user count and piggyback this to set expiry flag as well, then teardown(), if conditions are met. + * This *MUST* be called by anyone who uses this client. + */ + @Override + public void close() { + release(); + if (System.currentTimeMillis() >= expiryTime) + setExpiredFromCache(); + tearDownIfUnused(); + } + + /** + * Tear down only if + * 1. There are no active user + * 2. It has expired from the cache + */ + private void tearDownIfUnused() { + if (users.get() == 0 && expiredFromCache) { + this.tearDown(); + } + } + + /** + * Close if not closed already + */ + protected synchronized void tearDown() { + try { + if (!isClosed) { + super.close(); + } + isClosed = true; + } catch (Exception e) { + LOG.warn("Error closing hive metastore client. Ignored.", e); + } + } + + /** + * Last effort to clean up, may not even get called. + * @throws Throwable + */ + @Override + protected void finalize() throws Throwable { + try { + this.tearDown(); + } finally { + super.finalize(); + } + } + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/DataType.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/DataType.java new file mode 100644 index 0000000..9b36121 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/DataType.java @@ -0,0 +1,207 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.data; + +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import java.util.Map.Entry; + + +public abstract class DataType { + + public static final byte NULL = 1; + public static final byte BOOLEAN = 5; + public static final byte BYTE = 6; + public static final byte INTEGER = 10; + public static final byte SHORT = 11; + public static final byte LONG = 15; + public static final byte FLOAT = 20; + public static final byte DOUBLE = 25; + public static final byte STRING = 55; + public static final byte BINARY = 60; + + public static final byte MAP = 100; + public static final byte STRUCT = 110; + public static final byte LIST = 120; + public static final byte ERROR = -1; + + /** + * Determine the datatype of an object. + * @param o Object to test. + * @return byte code of the type, or ERROR if we don't know. + */ + public static byte findType(Object o) { + if (o == null) { + return NULL; + } + + Class clazz = o.getClass(); + + // Try to put the most common first + if (clazz == String.class) { + return STRING; + } else if (clazz == Integer.class) { + return INTEGER; + } else if (clazz == Long.class) { + return LONG; + } else if (clazz == Float.class) { + return FLOAT; + } else if (clazz == Double.class) { + return DOUBLE; + } else if (clazz == Boolean.class) { + return BOOLEAN; + } else if (clazz == Byte.class) { + return BYTE; + } else if (clazz == Short.class) { + return SHORT; + } else if (o instanceof List) { + return LIST; + } else if (o instanceof Map) { + return MAP; + } else if (o instanceof byte[]) { + return BINARY; + } else { + return ERROR; + } + } + + public static int compare(Object o1, Object o2) { + + return compare(o1, o2, findType(o1), findType(o2)); + } + + public static int compare(Object o1, Object o2, byte dt1, byte dt2) { + if (dt1 == dt2) { + switch (dt1) { + case NULL: + return 0; + + case BOOLEAN: + return ((Boolean) o1).compareTo((Boolean) o2); + + case BYTE: + return ((Byte) o1).compareTo((Byte) o2); + + case INTEGER: + return ((Integer) o1).compareTo((Integer) o2); + + case LONG: + return ((Long) o1).compareTo((Long) o2); + + case FLOAT: + return ((Float) o1).compareTo((Float) o2); + + case DOUBLE: + return ((Double) o1).compareTo((Double) o2); + + case STRING: + return ((String) o1).compareTo((String) o2); + + case SHORT: + return ((Short) o1).compareTo((Short) o2); + + case BINARY: + return compareByteArray((byte[]) o1, (byte[]) o2); + + case LIST: + List l1 = (List) o1; + List l2 = (List) o2; + int len = l1.size(); + if (len != l2.size()) { + return len - l2.size(); + } else { + for (int i = 0; i < len; i++) { + int cmpVal = compare(l1.get(i), l2.get(i)); + if (cmpVal != 0) { + return cmpVal; + } + } + return 0; + } + + case MAP: { + Map m1 = (Map) o1; + Map m2 = (Map) o2; + int sz1 = m1.size(); + int sz2 = m2.size(); + if (sz1 < sz2) { + return -1; + } else if (sz1 > sz2) { + return 1; + } else { + // This is bad, but we have to sort the keys of the maps in order + // to be commutative. + TreeMap tm1 = new TreeMap(m1); + TreeMap tm2 = new TreeMap(m2); + Iterator> i1 = tm1.entrySet().iterator(); + Iterator> i2 = tm2.entrySet().iterator(); + while (i1.hasNext()) { + Map.Entry entry1 = i1.next(); + Map.Entry entry2 = i2.next(); + int c = compare(entry1.getValue(), entry2.getValue()); + if (c != 0) { + return c; + } else { + c = compare(entry1.getValue(), entry2.getValue()); + if (c != 0) { + return c; + } + } + } + return 0; + } + } + + default: + throw new RuntimeException("Unkown type " + dt1 + + " in compare"); + } + } else { + return dt1 < dt2 ? -1 : 1; + } + } + + private static int compareByteArray(byte[] o1, byte[] o2) { + + for (int i = 0; i < o1.length; i++) { + if (i == o2.length) { + return 1; + } + if (o1[i] == o2[i]) { + continue; + } + if (o1[i] > o1[i]) { + return 1; + } else { + return -1; + } + } + + //bytes in o1 are same as o2 + //in case o2 was longer + if (o2.length > o1.length) { + return -1; + } + return 0; //equals + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/DefaultHCatRecord.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/DefaultHCatRecord.java new file mode 100644 index 0000000..c22a2b4 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/DefaultHCatRecord.java @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.data; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.schema.HCatSchema; + +public class DefaultHCatRecord extends HCatRecord { + + private List contents; + + public DefaultHCatRecord() { + contents = new ArrayList(); + } + + public DefaultHCatRecord(int size) { + contents = new ArrayList(size); + for (int i = 0; i < size; i++) { + contents.add(null); + } + } + + @Override + public void remove(int idx) throws HCatException { + contents.remove(idx); + } + + public DefaultHCatRecord(List list) { + contents = list; + } + + @Override + public Object get(int fieldNum) { + return contents.get(fieldNum); + } + + @Override + public List getAll() { + return contents; + } + + @Override + public void set(int fieldNum, Object val) { + contents.set(fieldNum, val); + } + + @Override + public int size() { + return contents.size(); + } + + @Override + public void readFields(DataInput in) throws IOException { + + contents.clear(); + int len = in.readInt(); + for (int i = 0; i < len; i++) { + contents.add(ReaderWriter.readDatum(in)); + } + } + + @Override + public void write(DataOutput out) throws IOException { + int sz = size(); + out.writeInt(sz); + for (int i = 0; i < sz; i++) { + ReaderWriter.writeDatum(out, contents.get(i)); + } + + } + + @Override + public int hashCode() { + int hash = 1; + for (Object o : contents) { + if (o != null) { + hash = 31 * hash + o.hashCode(); + } + } + return hash; + } + + @Override + public String toString() { + + StringBuilder sb = new StringBuilder(); + for (Object o : contents) { + sb.append(o + "\t"); + } + return sb.toString(); + } + + @Override + public Object get(String fieldName, HCatSchema recordSchema) throws HCatException { + return get(recordSchema.getPosition(fieldName)); + } + + @Override + public void set(String fieldName, HCatSchema recordSchema, Object value) throws HCatException { + set(recordSchema.getPosition(fieldName), value); + } + + @Override + public void copy(HCatRecord r) throws HCatException { + this.contents = r.getAll(); + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecord.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecord.java new file mode 100644 index 0000000..14ed244 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecord.java @@ -0,0 +1,149 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.data; + +import java.util.List; +import java.util.Map; + +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.schema.HCatSchema; + +/** + * Abstract class exposing get and set semantics for basic record usage. + * Note : + * HCatRecord is designed only to be used as in-memory representation only. + * Don't use it to store data on the physical device. + */ +public abstract class HCatRecord implements HCatRecordable { + + public abstract Object get(String fieldName, HCatSchema recordSchema) throws HCatException; + + public abstract void set(String fieldName, HCatSchema recordSchema, Object value) throws HCatException; + + public abstract void remove(int idx) throws HCatException; + + public abstract void copy(HCatRecord r) throws HCatException; + + protected Object get(String fieldName, HCatSchema recordSchema, Class clazz) throws HCatException { + // TODO : if needed, verify that recordschema entry for fieldname matches appropriate type. + return get(fieldName, recordSchema); + } + + public Boolean getBoolean(String fieldName, HCatSchema recordSchema) throws HCatException { + return (Boolean) get(fieldName, recordSchema, Boolean.class); + } + + public void setBoolean(String fieldName, HCatSchema recordSchema, Boolean value) throws HCatException { + set(fieldName, recordSchema, value); + } + + public byte[] getByteArray(String fieldName, HCatSchema recordSchema) throws HCatException { + return (byte[]) get(fieldName, recordSchema, byte[].class); + } + + public void setByteArray(String fieldName, HCatSchema recordSchema, byte[] value) throws HCatException { + set(fieldName, recordSchema, value); + } + + public Byte getByte(String fieldName, HCatSchema recordSchema) throws HCatException { + //TINYINT + return (Byte) get(fieldName, recordSchema, Byte.class); + } + + public void setByte(String fieldName, HCatSchema recordSchema, Byte value) throws HCatException { + set(fieldName, recordSchema, value); + } + + public Short getShort(String fieldName, HCatSchema recordSchema) throws HCatException { + // SMALLINT + return (Short) get(fieldName, recordSchema, Short.class); + } + + public void setShort(String fieldName, HCatSchema recordSchema, Short value) throws HCatException { + set(fieldName, recordSchema, value); + } + + public Integer getInteger(String fieldName, HCatSchema recordSchema) throws HCatException { + return (Integer) get(fieldName, recordSchema, Integer.class); + } + + public void setInteger(String fieldName, HCatSchema recordSchema, Integer value) throws HCatException { + set(fieldName, recordSchema, value); + } + + public Long getLong(String fieldName, HCatSchema recordSchema) throws HCatException { + // BIGINT + return (Long) get(fieldName, recordSchema, Long.class); + } + + public void setLong(String fieldName, HCatSchema recordSchema, Long value) throws HCatException { + set(fieldName, recordSchema, value); + } + + public Float getFloat(String fieldName, HCatSchema recordSchema) throws HCatException { + return (Float) get(fieldName, recordSchema, Float.class); + } + + public void setFloat(String fieldName, HCatSchema recordSchema, Float value) throws HCatException { + set(fieldName, recordSchema, value); + } + + public Double getDouble(String fieldName, HCatSchema recordSchema) throws HCatException { + return (Double) get(fieldName, recordSchema, Double.class); + } + + public void setDouble(String fieldName, HCatSchema recordSchema, Double value) throws HCatException { + set(fieldName, recordSchema, value); + } + + public String getString(String fieldName, HCatSchema recordSchema) throws HCatException { + return (String) get(fieldName, recordSchema, String.class); + } + + public void setString(String fieldName, HCatSchema recordSchema, String value) throws HCatException { + set(fieldName, recordSchema, value); + } + + @SuppressWarnings("unchecked") + public List getStruct(String fieldName, HCatSchema recordSchema) throws HCatException { + return (List) get(fieldName, recordSchema, List.class); + } + + public void setStruct(String fieldName, HCatSchema recordSchema, List value) throws HCatException { + set(fieldName, recordSchema, value); + } + + public List getList(String fieldName, HCatSchema recordSchema) throws HCatException { + return (List) get(fieldName, recordSchema, List.class); + } + + public void setList(String fieldName, HCatSchema recordSchema, List value) throws HCatException { + set(fieldName, recordSchema, value); + } + + public Map getMap(String fieldName, HCatSchema recordSchema) throws HCatException { + return (Map) get(fieldName, recordSchema, Map.class); + } + + public void setMap(String fieldName, HCatSchema recordSchema, Map value) throws HCatException { + set(fieldName, recordSchema, value); + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordObjectInspector.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordObjectInspector.java new file mode 100644 index 0000000..030f655 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordObjectInspector.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.data; + +import java.util.List; + +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; + +public class HCatRecordObjectInspector extends StandardStructObjectInspector { + + protected HCatRecordObjectInspector(List structFieldNames, + List structFieldObjectInspectors) { + super(structFieldNames, structFieldObjectInspectors); + } + + @Override + public Object getStructFieldData(Object data, StructField fieldRef) { + if (data == null) { + return new IllegalArgumentException("Data passed in to get field from was null!"); + } + + int fieldID = ((MyField) fieldRef).getFieldID(); + if (!(fieldID >= 0 && fieldID < fields.size())) { + throw new IllegalArgumentException("Invalid field index [" + fieldID + "]"); + } + + return ((HCatRecord) data).get(fieldID); + } + + @Override + public List getStructFieldsDataAsList(Object o) { + return ((HCatRecord) o).getAll(); + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordObjectInspectorFactory.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordObjectInspectorFactory.java new file mode 100644 index 0000000..9fbc9c0 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordObjectInspectorFactory.java @@ -0,0 +1,132 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.data; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * ObjectInspectorFactory for HCatRecordObjectInspectors (and associated helper inspectors) + */ +public class HCatRecordObjectInspectorFactory { + + private final static Logger LOG = LoggerFactory.getLogger(HCatRecordObjectInspectorFactory.class); + + static HashMap cachedHCatRecordObjectInspectors = + new HashMap(); + static HashMap cachedObjectInspectors = + new HashMap(); + + /** + * Returns HCatRecordObjectInspector given a StructTypeInfo type definition for the record to look into + * @param typeInfo Type definition for the record to look into + * @return appropriate HCatRecordObjectInspector + * @throws SerDeException + */ + public static HCatRecordObjectInspector getHCatRecordObjectInspector( + StructTypeInfo typeInfo) throws SerDeException { + HCatRecordObjectInspector oi = cachedHCatRecordObjectInspectors.get(typeInfo); + if (oi == null) { + + LOG.debug("Got asked for OI for {} [{} ]", typeInfo.getCategory(), typeInfo.getTypeName()); + switch (typeInfo.getCategory()) { + case STRUCT: + StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; + List fieldNames = structTypeInfo.getAllStructFieldNames(); + List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); + List fieldObjectInspectors = new ArrayList(fieldTypeInfos.size()); + for (int i = 0; i < fieldTypeInfos.size(); i++) { + fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i))); + } + oi = new HCatRecordObjectInspector(fieldNames, fieldObjectInspectors); + + break; + default: + // Hmm.. not good, + // the only type expected here is STRUCT, which maps to HCatRecord + // - anything else is an error. Return null as the inspector. + throw new SerDeException("TypeInfo [" + typeInfo.getTypeName() + + "] was not of struct type - HCatRecord expected struct type, got [" + + typeInfo.getCategory().toString() + "]"); + } + cachedHCatRecordObjectInspectors.put(typeInfo, oi); + } + return oi; + } + + public static ObjectInspector getStandardObjectInspectorFromTypeInfo(TypeInfo typeInfo) { + + + ObjectInspector oi = cachedObjectInspectors.get(typeInfo); + if (oi == null) { + + LOG.debug("Got asked for OI for {}, [{}]", typeInfo.getCategory(), typeInfo.getTypeName()); + switch (typeInfo.getCategory()) { + case PRIMITIVE: + oi = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector( + ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()); + break; + case STRUCT: + StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; + List fieldNames = structTypeInfo.getAllStructFieldNames(); + List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); + List fieldObjectInspectors = + new ArrayList(fieldTypeInfos.size()); + for (int i = 0; i < fieldTypeInfos.size(); i++) { + fieldObjectInspectors.add(getStandardObjectInspectorFromTypeInfo(fieldTypeInfos.get(i))); + } + oi = ObjectInspectorFactory.getStandardStructObjectInspector( + fieldNames, fieldObjectInspectors + ); + break; + case LIST: + ObjectInspector elementObjectInspector = getStandardObjectInspectorFromTypeInfo( + ((ListTypeInfo) typeInfo).getListElementTypeInfo()); + oi = ObjectInspectorFactory.getStandardListObjectInspector(elementObjectInspector); + break; + case MAP: + ObjectInspector keyObjectInspector = getStandardObjectInspectorFromTypeInfo( + ((MapTypeInfo) typeInfo).getMapKeyTypeInfo()); + ObjectInspector valueObjectInspector = getStandardObjectInspectorFromTypeInfo( + ((MapTypeInfo) typeInfo).getMapValueTypeInfo()); + oi = ObjectInspectorFactory.getStandardMapObjectInspector(keyObjectInspector, valueObjectInspector); + break; + default: + oi = null; + } + cachedObjectInspectors.put(typeInfo, oi); + } + return oi; + } + + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordSerDe.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordSerDe.java new file mode 100644 index 0000000..3153847 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordSerDe.java @@ -0,0 +1,318 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.data; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.TreeMap; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeStats; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.Writable; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatContext; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * SerDe class for serializing to and from HCatRecord + */ +public class HCatRecordSerDe implements SerDe { + + private static final Logger LOG = LoggerFactory.getLogger(HCatRecordSerDe.class); + + public HCatRecordSerDe() throws SerDeException { + } + + private List columnNames; + private List columnTypes; + private StructTypeInfo rowTypeInfo; + + private HCatRecordObjectInspector cachedObjectInspector; + + @Override + public void initialize(Configuration conf, Properties tbl) + throws SerDeException { + + LOG.debug("Initializing HCatRecordSerDe"); + LOG.debug("props to serde: {}", tbl.entrySet()); + + // Get column names and types + String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); + String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); + + // all table column names + if (columnNameProperty.length() == 0) { + columnNames = new ArrayList(); + } else { + columnNames = Arrays.asList(columnNameProperty.split(",")); + } + + // all column types + if (columnTypeProperty.length() == 0) { + columnTypes = new ArrayList(); + } else { + columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + } + + + LOG.debug("columns: {} {}", columnNameProperty, columnNames); + LOG.debug("types: {} {}", columnTypeProperty, columnTypes); + assert (columnNames.size() == columnTypes.size()); + + rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); + cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); + } + + public void initialize(HCatSchema hsch) throws SerDeException { + + LOG.debug("Initializing HCatRecordSerDe through HCatSchema {}.", hsch); + + rowTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(hsch.getSchemaAsTypeString()); + cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); + + } + + + /** + * The purpose of a deserialize method is to turn a data blob + * which is a writable representation of the data into an + * object that can then be parsed using the appropriate + * ObjectInspector. In this case, since HCatRecord is directly + * already the Writable object, there's no extra work to be done + * here. Most of the logic resides in the ObjectInspector to be + * able to return values from within the HCatRecord to hive when + * it wants it. + */ + @Override + public Object deserialize(Writable data) throws SerDeException { + if (!(data instanceof HCatRecord)) { + throw new SerDeException(getClass().getName() + ": expects HCatRecord!"); + } + + return (HCatRecord) data; + } + + /** + * The purpose of the serialize method is to turn an object-representation + * with a provided ObjectInspector into a Writable format, which + * the underlying layer can then use to write out. + * + * In this case, it means that Hive will call this method to convert + * an object with appropriate objectinspectors that it knows about, + * to write out a HCatRecord. + */ + @Override + public Writable serialize(Object obj, ObjectInspector objInspector) + throws SerDeException { + if (objInspector.getCategory() != Category.STRUCT) { + throw new SerDeException(getClass().toString() + + " can only serialize struct types, but we got: " + + objInspector.getTypeName()); + } + return new DefaultHCatRecord((List) serializeStruct(obj, (StructObjectInspector) objInspector)); + } + + + /** + * Return serialized HCatRecord from an underlying + * object-representation, and readable by an ObjectInspector + * @param obj : Underlying object-representation + * @param soi : StructObjectInspector + * @return HCatRecord + */ + private static List serializeStruct(Object obj, StructObjectInspector soi) + throws SerDeException { + + List fields = soi.getAllStructFieldRefs(); + List list = soi.getStructFieldsDataAsList(obj); + + if (list == null) { + return null; + } + + List l = new ArrayList(fields.size()); + + if (fields != null) { + for (int i = 0; i < fields.size(); i++) { + + // Get the field objectInspector and the field object. + ObjectInspector foi = fields.get(i).getFieldObjectInspector(); + Object f = list.get(i); + Object res = serializeField(f, foi); + l.add(i, res); + } + } + return l; + } + + /** + * Return underlying Java Object from an object-representation + * that is readable by a provided ObjectInspector. + */ + public static Object serializeField(Object field, ObjectInspector fieldObjectInspector) + throws SerDeException { + + Object res; + if (fieldObjectInspector.getCategory() == Category.PRIMITIVE) { + res = serializePrimitiveField(field, fieldObjectInspector); + } else if (fieldObjectInspector.getCategory() == Category.STRUCT) { + res = serializeStruct(field, (StructObjectInspector) fieldObjectInspector); + } else if (fieldObjectInspector.getCategory() == Category.LIST) { + res = serializeList(field, (ListObjectInspector) fieldObjectInspector); + } else if (fieldObjectInspector.getCategory() == Category.MAP) { + res = serializeMap(field, (MapObjectInspector) fieldObjectInspector); + } else { + throw new SerDeException(HCatRecordSerDe.class.toString() + + " does not know what to do with fields of unknown category: " + + fieldObjectInspector.getCategory() + " , type: " + fieldObjectInspector.getTypeName()); + } + return res; + } + + /** + * Helper method to return underlying Java Map from + * an object-representation that is readable by a provided + * MapObjectInspector + */ + private static Map serializeMap(Object f, MapObjectInspector moi) throws SerDeException { + ObjectInspector koi = moi.getMapKeyObjectInspector(); + ObjectInspector voi = moi.getMapValueObjectInspector(); + Map m = new TreeMap(); + + Map readMap = moi.getMap(f); + if (readMap == null) { + return null; + } else { + for (Map.Entry entry : readMap.entrySet()) { + m.put(serializeField(entry.getKey(), koi), serializeField(entry.getValue(), voi)); + } + } + return m; + } + + private static List serializeList(Object f, ListObjectInspector loi) throws SerDeException { + List l = loi.getList(f); + if (l == null) { + return null; + } + + ObjectInspector eloi = loi.getListElementObjectInspector(); + if (eloi.getCategory() == Category.PRIMITIVE) { + List list = new ArrayList(l.size()); + for (int i = 0; i < l.size(); i++) { + list.add(((PrimitiveObjectInspector) eloi).getPrimitiveJavaObject(l.get(i))); + } + return list; + } else if (eloi.getCategory() == Category.STRUCT) { + List> list = new ArrayList>(l.size()); + for (int i = 0; i < l.size(); i++) { + list.add(serializeStruct(l.get(i), (StructObjectInspector) eloi)); + } + return list; + } else if (eloi.getCategory() == Category.LIST) { + List> list = new ArrayList>(l.size()); + for (int i = 0; i < l.size(); i++) { + list.add(serializeList(l.get(i), (ListObjectInspector) eloi)); + } + return list; + } else if (eloi.getCategory() == Category.MAP) { + List> list = new ArrayList>(l.size()); + for (int i = 0; i < l.size(); i++) { + list.add(serializeMap(l.get(i), (MapObjectInspector) eloi)); + } + return list; + } else { + throw new SerDeException(HCatRecordSerDe.class.toString() + + " does not know what to do with fields of unknown category: " + + eloi.getCategory() + " , type: " + eloi.getTypeName()); + } + } + + private static Object serializePrimitiveField(Object field, + ObjectInspector fieldObjectInspector) { + + Object f = ((PrimitiveObjectInspector) fieldObjectInspector).getPrimitiveJavaObject(field); + if (f != null && HCatContext.INSTANCE.getConf().isPresent()) { + Configuration conf = HCatContext.INSTANCE.getConf().get(); + + if (f instanceof Boolean && + conf.getBoolean( + HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, + HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT)) { + return ((Boolean) f) ? 1 : 0; + } else if (f instanceof Short && + conf.getBoolean( + HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, + HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)) { + return new Integer((Short) f); + } else if (f instanceof Byte && + conf.getBoolean( + HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, + HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)) { + return new Integer((Byte) f); + } + } + + return f; + } + + /** + * Return an object inspector that can read through the object + * that we return from deserialize(). To wit, that means we need + * to return an ObjectInspector that can read HCatRecord, given + * the type info for it during initialize(). This also means + * that this method cannot and should not be called before initialize() + */ + @Override + public ObjectInspector getObjectInspector() throws SerDeException { + return (ObjectInspector) cachedObjectInspector; + } + + @Override + public Class getSerializedClass() { + return HCatRecord.class; + } + + @Override + public SerDeStats getSerDeStats() { + // no support for statistics yet + return null; + } + + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordable.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordable.java new file mode 100644 index 0000000..4e9934e --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/HCatRecordable.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.data; + +import java.util.List; + +import org.apache.hadoop.io.Writable; + +/** + * Interface that determines whether we can implement a HCatRecord on top of it + */ +public interface HCatRecordable extends Writable { + + /** + * Gets the field at the specified index. + * @param fieldNum the field number + * @return the object at the specified index + */ + Object get(int fieldNum); + + /** + * Gets all the fields of the hcat record. + * @return the list of fields + */ + List getAll(); + + /** + * Sets the field at the specified index. + * @param fieldNum the field number + * @param value the value to set + */ + void set(int fieldNum, Object value); + + /** + * Gets the size of the hcat record. + * @return the size + */ + int size(); + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/JsonSerDe.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/JsonSerDe.java new file mode 100644 index 0000000..b68f308 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/JsonSerDe.java @@ -0,0 +1,575 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.data; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeStats; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema.Type; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils; +import org.codehaus.jackson.JsonFactory; +import org.codehaus.jackson.JsonParseException; +import org.codehaus.jackson.JsonParser; +import org.codehaus.jackson.JsonToken; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class JsonSerDe implements SerDe { + + private static final Logger LOG = LoggerFactory.getLogger(JsonSerDe.class); + private List columnNames; + private List columnTypes; + + private StructTypeInfo rowTypeInfo; + private HCatSchema schema; + + private JsonFactory jsonFactory = null; + + private HCatRecordObjectInspector cachedObjectInspector; + + @Override + public void initialize(Configuration conf, Properties tbl) + throws SerDeException { + + + LOG.debug("Initializing JsonSerDe"); + LOG.debug("props to serde: {}", tbl.entrySet()); + + + // Get column names and types + String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); + String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); + + // all table column names + if (columnNameProperty.length() == 0) { + columnNames = new ArrayList(); + } else { + columnNames = Arrays.asList(columnNameProperty.split(",")); + } + + // all column types + if (columnTypeProperty.length() == 0) { + columnTypes = new ArrayList(); + } else { + columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + } + + LOG.debug("columns: {}, {}", columnNameProperty, columnNames); + LOG.debug("types: {}, {} ", columnTypeProperty, columnTypes); + + assert (columnNames.size() == columnTypes.size()); + + rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); + + cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); + try { + schema = HCatSchemaUtils.getHCatSchema(rowTypeInfo).get(0).getStructSubSchema(); + LOG.debug("schema : {}", schema); + LOG.debug("fields : {}", schema.getFieldNames()); + } catch (HCatException e) { + throw new SerDeException(e); + } + + jsonFactory = new JsonFactory(); + } + + /** + * Takes JSON string in Text form, and has to return an object representation above + * it that's readable by the corresponding object inspector. + * + * For this implementation, since we're using the jackson parser, we can construct + * our own object implementation, and we use HCatRecord for it + */ + @Override + public Object deserialize(Writable blob) throws SerDeException { + + Text t = (Text) blob; + JsonParser p; + List r = new ArrayList(Collections.nCopies(columnNames.size(), null)); + try { + p = jsonFactory.createJsonParser(new ByteArrayInputStream((t.getBytes()))); + if (p.nextToken() != JsonToken.START_OBJECT) { + throw new IOException("Start token not found where expected"); + } + JsonToken token; + while (((token = p.nextToken()) != JsonToken.END_OBJECT) && (token != null)) { + // iterate through each token, and create appropriate object here. + populateRecord(r, token, p, schema); + } + } catch (JsonParseException e) { + LOG.warn("Error [{}] parsing json text [{}].", e, t); + LOG.debug(null, e); + throw new SerDeException(e); + } catch (IOException e) { + LOG.warn("Error [{}] parsing json text [{}].", e, t); + LOG.debug(null, e); + throw new SerDeException(e); + } + + return new DefaultHCatRecord(r); + } + + private void populateRecord(List r, JsonToken token, JsonParser p, HCatSchema s) throws IOException { + if (token != JsonToken.FIELD_NAME) { + throw new IOException("Field name expected"); + } + String fieldName = p.getText(); + int fpos; + try { + fpos = s.getPosition(fieldName); + } catch (NullPointerException npe) { + fpos = getPositionFromHiveInternalColumnName(fieldName); + LOG.debug("NPE finding position for field [{}] in schema [{}]", fieldName, s); + if (!fieldName.equalsIgnoreCase(getHiveInternalColumnName(fpos))) { + LOG.error("Hive internal column name {} and position " + + "encoding {} for the column name are at odds", fieldName, fpos); + throw npe; + } + if (fpos == -1) { + return; // unknown field, we return. + } + } + HCatFieldSchema hcatFieldSchema = s.getFields().get(fpos); + Object currField = extractCurrentField(p, null, hcatFieldSchema, false); + r.set(fpos, currField); + } + + public String getHiveInternalColumnName(int fpos) { + return HiveConf.getColumnInternalName(fpos); + } + + public int getPositionFromHiveInternalColumnName(String internalName) { +// return HiveConf.getPositionFromInternalName(fieldName); + // The above line should have been all the implementation that + // we need, but due to a bug in that impl which recognizes + // only single-digit columns, we need another impl here. + Pattern internalPattern = Pattern.compile("_col([0-9]+)"); + Matcher m = internalPattern.matcher(internalName); + if (!m.matches()) { + return -1; + } else { + return Integer.parseInt(m.group(1)); + } + } + + /** + * Utility method to extract current expected field from given JsonParser + * + * To get the field, we need either a type or a hcatFieldSchema(necessary for complex types) + * It is possible that one of them can be null, and so, if so, the other is instantiated + * from the other + * + * isTokenCurrent is a boolean variable also passed in, which determines + * if the JsonParser is already at the token we expect to read next, or + * needs advancing to the next before we read. + */ + private Object extractCurrentField(JsonParser p, Type t, + HCatFieldSchema hcatFieldSchema, boolean isTokenCurrent) throws IOException, JsonParseException, + HCatException { + Object val = null; + JsonToken valueToken; + if (isTokenCurrent) { + valueToken = p.getCurrentToken(); + } else { + valueToken = p.nextToken(); + } + + if (hcatFieldSchema != null) { + t = hcatFieldSchema.getType(); + } + switch (t) { + case INT: + val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getIntValue(); + break; + case TINYINT: + val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getByteValue(); + break; + case SMALLINT: + val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getShortValue(); + break; + case BIGINT: + val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getLongValue(); + break; + case BOOLEAN: + String bval = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText(); + if (bval != null) { + val = Boolean.valueOf(bval); + } else { + val = null; + } + break; + case FLOAT: + val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getFloatValue(); + break; + case DOUBLE: + val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getDoubleValue(); + break; + case STRING: + val = (valueToken == JsonToken.VALUE_NULL) ? null : p.getText(); + break; + case BINARY: + throw new IOException("JsonSerDe does not support BINARY type"); + case ARRAY: + if (valueToken == JsonToken.VALUE_NULL) { + val = null; + break; + } + if (valueToken != JsonToken.START_ARRAY) { + throw new IOException("Start of Array expected"); + } + List arr = new ArrayList(); + while ((valueToken = p.nextToken()) != JsonToken.END_ARRAY) { + arr.add(extractCurrentField(p, null, hcatFieldSchema.getArrayElementSchema().get(0), true)); + } + val = arr; + break; + case MAP: + if (valueToken == JsonToken.VALUE_NULL) { + val = null; + break; + } + if (valueToken != JsonToken.START_OBJECT) { + throw new IOException("Start of Object expected"); + } + Map map = new LinkedHashMap(); + Type keyType = hcatFieldSchema.getMapKeyType(); + HCatFieldSchema valueSchema = hcatFieldSchema.getMapValueSchema().get(0); + while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) { + Object k = getObjectOfCorrespondingPrimitiveType(p.getCurrentName(), keyType); + Object v; + if (valueSchema.getType() == HCatFieldSchema.Type.STRUCT) { + v = extractCurrentField(p, null, valueSchema, false); + } else { + v = extractCurrentField(p, null, valueSchema, true); + } + + map.put(k, v); + } + val = map; + break; + case STRUCT: + if (valueToken == JsonToken.VALUE_NULL) { + val = null; + break; + } + if (valueToken != JsonToken.START_OBJECT) { + throw new IOException("Start of Object expected"); + } + HCatSchema subSchema = hcatFieldSchema.getStructSubSchema(); + int sz = subSchema.getFieldNames().size(); + + List struct = new ArrayList(Collections.nCopies(sz, null)); + while ((valueToken = p.nextToken()) != JsonToken.END_OBJECT) { + populateRecord(struct, valueToken, p, subSchema); + } + val = struct; + break; + } + return val; + } + + private Object getObjectOfCorrespondingPrimitiveType(String s, Type t) throws IOException { + switch (t) { + case INT: + return Integer.valueOf(s); + case TINYINT: + return Byte.valueOf(s); + case SMALLINT: + return Short.valueOf(s); + case BIGINT: + return Long.valueOf(s); + case BOOLEAN: + return (s.equalsIgnoreCase("true")); + case FLOAT: + return Float.valueOf(s); + case DOUBLE: + return Double.valueOf(s); + case STRING: + return s; + case BINARY: + throw new IOException("JsonSerDe does not support BINARY type"); + } + throw new IOException("Could not convert from string to map type " + t); + } + + /** + * Given an object and object inspector pair, traverse the object + * and generate a Text representation of the object. + */ + @Override + public Writable serialize(Object obj, ObjectInspector objInspector) + throws SerDeException { + StringBuilder sb = new StringBuilder(); + try { + + StructObjectInspector soi = (StructObjectInspector) objInspector; + List structFields = soi.getAllStructFieldRefs(); + assert (columnNames.size() == structFields.size()); + if (obj == null) { + sb.append("null"); + } else { + sb.append(SerDeUtils.LBRACE); + for (int i = 0; i < structFields.size(); i++) { + if (i > 0) { + sb.append(SerDeUtils.COMMA); + } + sb.append(SerDeUtils.QUOTE); + sb.append(columnNames.get(i)); + sb.append(SerDeUtils.QUOTE); + sb.append(SerDeUtils.COLON); + buildJSONString(sb, soi.getStructFieldData(obj, structFields.get(i)), + structFields.get(i).getFieldObjectInspector()); + } + sb.append(SerDeUtils.RBRACE); + } + + } catch (IOException e) { + LOG.warn("Error generating json text from object.", e); + throw new SerDeException(e); + } + return new Text(sb.toString()); + } + + // TODO : code section copied over from SerDeUtils because of non-standard json production there + // should use quotes for all field names. We should fix this there, and then remove this copy. + // See http://jackson.codehaus.org/1.7.3/javadoc/org/codehaus/jackson/JsonParser.Feature.html#ALLOW_UNQUOTED_FIELD_NAMES + // for details - trying to enable Jackson to ignore that doesn't seem to work(compilation failure + // when attempting to use that feature, so having to change the production itself. + // Also, throws IOException when Binary is detected. + private static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi) throws IOException { + + switch (oi.getCategory()) { + case PRIMITIVE: { + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; + if (o == null) { + sb.append("null"); + } else { + switch (poi.getPrimitiveCategory()) { + case BOOLEAN: { + boolean b = ((BooleanObjectInspector) poi).get(o); + sb.append(b ? "true" : "false"); + break; + } + case BYTE: { + sb.append(((ByteObjectInspector) poi).get(o)); + break; + } + case SHORT: { + sb.append(((ShortObjectInspector) poi).get(o)); + break; + } + case INT: { + sb.append(((IntObjectInspector) poi).get(o)); + break; + } + case LONG: { + sb.append(((LongObjectInspector) poi).get(o)); + break; + } + case FLOAT: { + sb.append(((FloatObjectInspector) poi).get(o)); + break; + } + case DOUBLE: { + sb.append(((DoubleObjectInspector) poi).get(o)); + break; + } + case STRING: { + sb.append('"'); + sb.append(SerDeUtils.escapeString(((StringObjectInspector) poi) + .getPrimitiveJavaObject(o))); + sb.append('"'); + break; + } + case TIMESTAMP: { + sb.append('"'); + sb.append(((TimestampObjectInspector) poi) + .getPrimitiveWritableObject(o)); + sb.append('"'); + break; + } + case BINARY: { + throw new IOException("JsonSerDe does not support BINARY type"); + } + default: + throw new RuntimeException("Unknown primitive type: " + + poi.getPrimitiveCategory()); + } + } + break; + } + case LIST: { + ListObjectInspector loi = (ListObjectInspector) oi; + ObjectInspector listElementObjectInspector = loi + .getListElementObjectInspector(); + List olist = loi.getList(o); + if (olist == null) { + sb.append("null"); + } else { + sb.append(SerDeUtils.LBRACKET); + for (int i = 0; i < olist.size(); i++) { + if (i > 0) { + sb.append(SerDeUtils.COMMA); + } + buildJSONString(sb, olist.get(i), listElementObjectInspector); + } + sb.append(SerDeUtils.RBRACKET); + } + break; + } + case MAP: { + MapObjectInspector moi = (MapObjectInspector) oi; + ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector(); + ObjectInspector mapValueObjectInspector = moi + .getMapValueObjectInspector(); + Map omap = moi.getMap(o); + if (omap == null) { + sb.append("null"); + } else { + sb.append(SerDeUtils.LBRACE); + boolean first = true; + for (Object entry : omap.entrySet()) { + if (first) { + first = false; + } else { + sb.append(SerDeUtils.COMMA); + } + Map.Entry e = (Map.Entry) entry; + StringBuilder keyBuilder = new StringBuilder(); + buildJSONString(keyBuilder, e.getKey(), mapKeyObjectInspector); + String keyString = keyBuilder.toString().trim(); + boolean doQuoting = (!keyString.isEmpty()) && (keyString.charAt(0) != SerDeUtils.QUOTE); + if (doQuoting) { + sb.append(SerDeUtils.QUOTE); + } + sb.append(keyString); + if (doQuoting) { + sb.append(SerDeUtils.QUOTE); + } + sb.append(SerDeUtils.COLON); + buildJSONString(sb, e.getValue(), mapValueObjectInspector); + } + sb.append(SerDeUtils.RBRACE); + } + break; + } + case STRUCT: { + StructObjectInspector soi = (StructObjectInspector) oi; + List structFields = soi.getAllStructFieldRefs(); + if (o == null) { + sb.append("null"); + } else { + sb.append(SerDeUtils.LBRACE); + for (int i = 0; i < structFields.size(); i++) { + if (i > 0) { + sb.append(SerDeUtils.COMMA); + } + sb.append(SerDeUtils.QUOTE); + sb.append(structFields.get(i).getFieldName()); + sb.append(SerDeUtils.QUOTE); + sb.append(SerDeUtils.COLON); + buildJSONString(sb, soi.getStructFieldData(o, structFields.get(i)), + structFields.get(i).getFieldObjectInspector()); + } + sb.append(SerDeUtils.RBRACE); + } + break; + } + case UNION: { + UnionObjectInspector uoi = (UnionObjectInspector) oi; + if (o == null) { + sb.append("null"); + } else { + sb.append(SerDeUtils.LBRACE); + sb.append(uoi.getTag(o)); + sb.append(SerDeUtils.COLON); + buildJSONString(sb, uoi.getField(o), + uoi.getObjectInspectors().get(uoi.getTag(o))); + sb.append(SerDeUtils.RBRACE); + } + break; + } + default: + throw new RuntimeException("Unknown type in ObjectInspector!"); + } + } + + + /** + * Returns an object inspector for the specified schema that + * is capable of reading in the object representation of the JSON string + */ + @Override + public ObjectInspector getObjectInspector() throws SerDeException { + return cachedObjectInspector; + } + + @Override + public Class getSerializedClass() { + return Text.class; + } + + @Override + public SerDeStats getSerDeStats() { + // no support for statistics yet + return null; + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/LazyHCatRecord.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/LazyHCatRecord.java new file mode 100644 index 0000000..edac621 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/LazyHCatRecord.java @@ -0,0 +1,146 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.data; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * An implementation of HCatRecord that wraps an Object returned by a SerDe + * and an ObjectInspector. This delays deserialization of unused columns. + */ +public class LazyHCatRecord extends HCatRecord { + + public static final Logger LOG = LoggerFactory.getLogger(LazyHCatRecord.class.getName()); + + private Object wrappedObject; + private StructObjectInspector soi; + + @Override + public Object get(int fieldNum) { + try { + StructField fref = soi.getAllStructFieldRefs().get(fieldNum); + return HCatRecordSerDe.serializeField( + soi.getStructFieldData(wrappedObject, fref), + fref.getFieldObjectInspector()); + } catch (SerDeException e) { + throw new IllegalStateException("SerDe Exception deserializing",e); + } + } + + @Override + public List getAll() { + List r = new ArrayList(this.size()); + for (int i = 0; i < this.size(); i++){ + r.add(i, get(i)); + } + return r; + } + + @Override + public void set(int fieldNum, Object value) { + throw new UnsupportedOperationException("not allowed to run set() on LazyHCatRecord"); + } + + @Override + public int size() { + return soi.getAllStructFieldRefs().size(); + } + + @Override + public void readFields(DataInput in) throws IOException { + throw new UnsupportedOperationException("LazyHCatRecord is intended to wrap" + + " an object/object inspector as a HCatRecord " + + "- it does not need to be read from DataInput."); + } + + @Override + public void write(DataOutput out) throws IOException { + throw new UnsupportedOperationException("LazyHCatRecord is intended to wrap" + + " an object/object inspector as a HCatRecord " + + "- it does not need to be written to a DataOutput."); + } + + @Override + public Object get(String fieldName, HCatSchema recordSchema) throws HCatException { + int idx = recordSchema.getPosition(fieldName); + return get(idx); + } + + @Override + public void set(String fieldName, HCatSchema recordSchema, Object value) throws HCatException { + throw new UnsupportedOperationException("not allowed to run set() on LazyHCatRecord"); + } + + @Override + public void remove(int idx) throws HCatException { + throw new UnsupportedOperationException("not allowed to run remove() on LazyHCatRecord"); + } + + @Override + public void copy(HCatRecord r) throws HCatException { + throw new UnsupportedOperationException("not allowed to run copy() on LazyHCatRecord"); + } + + public LazyHCatRecord(Object wrappedObject, ObjectInspector oi) throws Exception { + if (oi.getCategory() != Category.STRUCT) { + throw new SerDeException(getClass().toString() + + " can only make a lazy hcat record from " + + "objects of struct types, but we got: " + oi.getTypeName()); + } + + this.soi = (StructObjectInspector)oi; + this.wrappedObject = wrappedObject; + } + + @Override + public String toString(){ + StringBuilder sb = new StringBuilder(); + for(int i = 0; i< size() ; i++) { + sb.append(get(i)+"\t"); + } + return sb.toString(); + } + + /** + * Convert this LazyHCatRecord to a DefaultHCatRecord. This is required + * before you can write out a record via write. + * @return an HCatRecord that can be serialized + * @throws HCatException + */ + public HCatRecord getWritable() throws HCatException { + DefaultHCatRecord d = new DefaultHCatRecord(); + d.copy(this); + return d; + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/Pair.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/Pair.java new file mode 100644 index 0000000..c71157b --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/Pair.java @@ -0,0 +1,89 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.data; + +import java.io.Serializable; + +/** + * Copy of C++ STL pair container. + */ +public class Pair implements Serializable { + + private static final long serialVersionUID = 1L; + public T first; + public U second; + + /** + * @param f First element in pair. + * @param s Second element in pair. + */ + public Pair(T f, U s) { + first = f; + second = s; + } + + /* (non-Javadoc) + * @see java.lang.Object#toString() + */ + @Override + public String toString() { + return "[" + first.toString() + "," + second.toString() + "]"; + } + + @Override + public int hashCode() { + return (((this.first == null ? 1 : this.first.hashCode()) * 17) + + (this.second == null ? 1 : this.second.hashCode()) * 19); + } + + @Override + public boolean equals(Object other) { + if (other == null) { + return false; + } + + if (!(other instanceof Pair)) { + return false; + } + + Pair otherPair = (Pair) other; + + if (this.first == null) { + if (otherPair.first != null) { + return false; + } else { + return true; + } + } + + if (this.second == null) { + if (otherPair.second != null) { + return false; + } else { + return true; + } + } + + if (this.first.equals(otherPair.first) && this.second.equals(otherPair.second)) { + return true; + } else { + return false; + } + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/ReaderWriter.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/ReaderWriter.java new file mode 100644 index 0000000..53e440b --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/ReaderWriter.java @@ -0,0 +1,192 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.data; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.hadoop.io.VIntWritable; +import org.apache.hadoop.io.VLongWritable; + + +public abstract class ReaderWriter { + + private static final String UTF8 = "UTF-8"; + + public static Object readDatum(DataInput in) throws IOException { + + byte type = in.readByte(); + switch (type) { + + case DataType.STRING: + byte[] buffer = new byte[in.readInt()]; + in.readFully(buffer); + return new String(buffer, UTF8); + + case DataType.INTEGER: + VIntWritable vint = new VIntWritable(); + vint.readFields(in); + return vint.get(); + + case DataType.LONG: + VLongWritable vlong = new VLongWritable(); + vlong.readFields(in); + return vlong.get(); + + case DataType.FLOAT: + return in.readFloat(); + + case DataType.DOUBLE: + return in.readDouble(); + + case DataType.BOOLEAN: + return in.readBoolean(); + + case DataType.BYTE: + return in.readByte(); + + case DataType.SHORT: + return in.readShort(); + + case DataType.NULL: + return null; + + case DataType.BINARY: + int len = in.readInt(); + byte[] ba = new byte[len]; + in.readFully(ba); + return ba; + + case DataType.MAP: + int size = in.readInt(); + Map m = new HashMap(size); + for (int i = 0; i < size; i++) { + m.put(readDatum(in), readDatum(in)); + } + return m; + + case DataType.LIST: + int sz = in.readInt(); + List list = new ArrayList(sz); + for (int i = 0; i < sz; i++) { + list.add(readDatum(in)); + } + return list; + + default: + throw new IOException("Unexpected data type " + type + + " found in stream."); + } + } + + public static void writeDatum(DataOutput out, Object val) throws IOException { + // write the data type + byte type = DataType.findType(val); + switch (type) { + case DataType.LIST: + out.writeByte(DataType.LIST); + List list = (List) val; + int sz = list.size(); + out.writeInt(sz); + for (int i = 0; i < sz; i++) { + writeDatum(out, list.get(i)); + } + return; + + case DataType.MAP: + out.writeByte(DataType.MAP); + Map m = (Map) val; + out.writeInt(m.size()); + Iterator i = + m.entrySet().iterator(); + while (i.hasNext()) { + Entry entry = (Entry) i.next(); + writeDatum(out, entry.getKey()); + writeDatum(out, entry.getValue()); + } + return; + + case DataType.INTEGER: + out.writeByte(DataType.INTEGER); + new VIntWritable((Integer) val).write(out); + return; + + case DataType.LONG: + out.writeByte(DataType.LONG); + new VLongWritable((Long) val).write(out); + return; + + case DataType.FLOAT: + out.writeByte(DataType.FLOAT); + out.writeFloat((Float) val); + return; + + case DataType.DOUBLE: + out.writeByte(DataType.DOUBLE); + out.writeDouble((Double) val); + return; + + case DataType.BOOLEAN: + out.writeByte(DataType.BOOLEAN); + out.writeBoolean((Boolean) val); + return; + + case DataType.BYTE: + out.writeByte(DataType.BYTE); + out.writeByte((Byte) val); + return; + + case DataType.SHORT: + out.writeByte(DataType.SHORT); + out.writeShort((Short) val); + return; + + case DataType.STRING: + String s = (String) val; + byte[] utfBytes = s.getBytes(ReaderWriter.UTF8); + out.writeByte(DataType.STRING); + out.writeInt(utfBytes.length); + out.write(utfBytes); + return; + + case DataType.BINARY: + byte[] ba = (byte[]) val; + out.writeByte(DataType.BINARY); + out.writeInt(ba.length); + out.write(ba); + return; + + case DataType.NULL: + out.writeByte(DataType.NULL); + return; + + default: + throw new IOException("Unexpected data type " + type + + " found in stream."); + } + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/schema/HCatFieldSchema.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/schema/HCatFieldSchema.java new file mode 100644 index 0000000..2b06469 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/schema/HCatFieldSchema.java @@ -0,0 +1,292 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.data.schema; + +import java.io.Serializable; + +import org.apache.commons.lang.builder.ToStringBuilder; +import org.apache.hive.hcatalog.common.HCatException; + +public class HCatFieldSchema implements Serializable { + + public enum Type { + INT, + TINYINT, + SMALLINT, + BIGINT, + BOOLEAN, + FLOAT, + DOUBLE, + STRING, + ARRAY, + MAP, + STRUCT, + BINARY, + } + + public enum Category { + PRIMITIVE, + ARRAY, + MAP, + STRUCT; + + public static Category fromType(Type type) { + if (Type.ARRAY == type) { + return ARRAY; + } else if (Type.STRUCT == type) { + return STRUCT; + } else if (Type.MAP == type) { + return MAP; + } else { + return PRIMITIVE; + } + } + } + + ; + + public boolean isComplex() { + return (category == Category.PRIMITIVE) ? false : true; + } + + /** + * + */ + private static final long serialVersionUID = 1L; + + String fieldName = null; + String comment = null; + Type type = null; + Category category = null; + + // Populated if column is struct, array or map types. + // If struct type, contains schema of the struct. + // If array type, contains schema of one of the elements. + // If map type, contains schema of the value element. + HCatSchema subSchema = null; + + // populated if column is Map type + Type mapKeyType = null; + + private String typeString = null; + + @SuppressWarnings("unused") + private HCatFieldSchema() { + // preventing empty ctor from being callable + } + + /** + * Returns type of the field + * @return type of the field + */ + public Type getType() { + return type; + } + + /** + * Returns category of the field + * @return category of the field + */ + public Category getCategory() { + return category; + } + + /** + * Returns name of the field + * @return name of the field + */ + public String getName() { + return fieldName; + } + + public String getComment() { + return comment; + } + + /** + * Constructor constructing a primitive datatype HCatFieldSchema + * @param fieldName Name of the primitive field + * @param type Type of the primitive field + * @throws HCatException if call made on non-primitive types + */ + public HCatFieldSchema(String fieldName, Type type, String comment) throws HCatException { + assertTypeInCategory(type, Category.PRIMITIVE, fieldName); + this.fieldName = fieldName; + this.type = type; + this.category = Category.PRIMITIVE; + this.comment = comment; + } + + /** + * Constructor for constructing a ARRAY type or STRUCT type HCatFieldSchema, passing type and subschema + * @param fieldName Name of the array or struct field + * @param type Type of the field - either Type.ARRAY or Type.STRUCT + * @param subSchema - subschema of the struct, or element schema of the elements in the array + * @throws HCatException if call made on Primitive or Map types + */ + public HCatFieldSchema(String fieldName, Type type, HCatSchema subSchema, String comment) throws HCatException { + assertTypeNotInCategory(type, Category.PRIMITIVE); + assertTypeNotInCategory(type, Category.MAP); + this.fieldName = fieldName; + this.type = type; + this.category = Category.fromType(type); + this.subSchema = subSchema; + if (type == Type.ARRAY) { + this.subSchema.get(0).setName(null); + } + this.comment = comment; + } + + private void setName(String name) { + this.fieldName = name; + } + + /** + * Constructor for constructing a MAP type HCatFieldSchema, passing type of key and value + * @param fieldName Name of the array or struct field + * @param type Type of the field - must be Type.MAP + * @param mapKeyType - key type of the Map + * @param mapValueSchema - subschema of the value of the Map + * @throws HCatException if call made on non-Map types + */ + public HCatFieldSchema(String fieldName, Type type, Type mapKeyType, HCatSchema mapValueSchema, String comment) throws HCatException { + assertTypeInCategory(type, Category.MAP, fieldName); + assertTypeInCategory(mapKeyType, Category.PRIMITIVE, fieldName); + this.fieldName = fieldName; + this.type = Type.MAP; + this.category = Category.MAP; + this.mapKeyType = mapKeyType; + this.subSchema = mapValueSchema; + this.subSchema.get(0).setName(null); + this.comment = comment; + } + + public HCatSchema getStructSubSchema() throws HCatException { + assertTypeInCategory(this.type, Category.STRUCT, this.fieldName); + return subSchema; + } + + public HCatSchema getArrayElementSchema() throws HCatException { + assertTypeInCategory(this.type, Category.ARRAY, this.fieldName); + return subSchema; + } + + public Type getMapKeyType() throws HCatException { + assertTypeInCategory(this.type, Category.MAP, this.fieldName); + return mapKeyType; + } + + public HCatSchema getMapValueSchema() throws HCatException { + assertTypeInCategory(this.type, Category.MAP, this.fieldName); + return subSchema; + } + + private static void assertTypeInCategory(Type type, Category category, String fieldName) throws HCatException { + Category typeCategory = Category.fromType(type); + if (typeCategory != category) { + throw new HCatException("Type category mismatch. Expected " + category + " but type " + type + " in category " + typeCategory + " (field " + fieldName + ")"); + } + } + + private static void assertTypeNotInCategory(Type type, Category category) throws HCatException { + Category typeCategory = Category.fromType(type); + if (typeCategory == category) { + throw new HCatException("Type category mismatch. Expected type " + type + " not in category " + category + " but was so."); + } + } + + @Override + public String toString() { + return new ToStringBuilder(this) + .append("fieldName", fieldName) + .append("comment", comment) + .append("type", getTypeString()) + .append("category", category) + .toString(); + } + + public String getTypeString() { + if (typeString != null) { + return typeString; + } + + StringBuilder sb = new StringBuilder(); + if (Category.PRIMITIVE == category) { + sb.append(type); + } else if (Category.STRUCT == category) { + sb.append("struct<"); + sb.append(subSchema.getSchemaAsTypeString()); + sb.append(">"); + } else if (Category.ARRAY == category) { + sb.append("array<"); + sb.append(subSchema.getSchemaAsTypeString()); + sb.append(">"); + } else if (Category.MAP == category) { + sb.append("map<"); + sb.append(mapKeyType); + sb.append(","); + sb.append(subSchema.getSchemaAsTypeString()); + sb.append(">"); + } + return (typeString = sb.toString().toLowerCase()); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (!(obj instanceof HCatFieldSchema)) { + return false; + } + HCatFieldSchema other = (HCatFieldSchema) obj; + if (category != other.category) { + return false; + } + if (fieldName == null) { + if (other.fieldName != null) { + return false; + } + } else if (!fieldName.equals(other.fieldName)) { + return false; + } + if (this.getTypeString() == null) { + if (other.getTypeString() != null) { + return false; + } + } else if (!this.getTypeString().equals(other.getTypeString())) { + return false; + } + return true; + } + + @Override + public int hashCode() { + //result could be cached if this object were to be made immutable... + int result = 17; + result = 31 * result + (category == null ? 0 : category.hashCode()); + result = 31 * result + (fieldName == null ? 0 : fieldName.hashCode()); + result = 31 * result + (getTypeString() == null ? 0 : + getTypeString().hashCode()); + return result; + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/schema/HCatSchema.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/schema/HCatSchema.java new file mode 100644 index 0000000..e66b1c9 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/schema/HCatSchema.java @@ -0,0 +1,185 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.data.schema; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hive.hcatalog.common.HCatException; + +/** + * HCatSchema. This class is NOT thread-safe. + */ + +public class HCatSchema implements Serializable { + + private static final long serialVersionUID = 1L; + + private final List fieldSchemas; + //HCatFieldSchema.getName()->position + private final Map fieldPositionMap; + private final List fieldNames; + + /** + * + * @param fieldSchemas is now owned by HCatSchema. Any subsequent modifications + * on fieldSchemas won't get reflected in HCatSchema. Each fieldSchema's name + * in the list must be unique, otherwise throws IllegalArgumentException. + */ + public HCatSchema(final List fieldSchemas) { + this.fieldSchemas = new ArrayList(fieldSchemas); + int idx = 0; + fieldPositionMap = new HashMap(); + fieldNames = new ArrayList(); + for (HCatFieldSchema field : fieldSchemas) { + if (field == null) + throw new IllegalArgumentException("Field cannot be null"); + + String fieldName = field.getName(); + if (fieldPositionMap.containsKey(fieldName)) + throw new IllegalArgumentException("Field named " + fieldName + + " already exists"); + fieldPositionMap.put(fieldName, idx); + fieldNames.add(fieldName); + idx++; + } + } + + public void append(final HCatFieldSchema hfs) throws HCatException { + if (hfs == null) + throw new HCatException("Attempt to append null HCatFieldSchema in HCatSchema."); + + String fieldName = hfs.getName(); + if (fieldPositionMap.containsKey(fieldName)) + throw new HCatException("Attempt to append HCatFieldSchema with already " + + "existing name: " + fieldName + "."); + + this.fieldSchemas.add(hfs); + this.fieldNames.add(fieldName); + this.fieldPositionMap.put(fieldName, this.size() - 1); + } + + /** + * Users are not allowed to modify the list directly, since HCatSchema + * maintains internal state. Use append/remove to modify the schema. + */ + public List getFields() { + return Collections.unmodifiableList(this.fieldSchemas); + } + + /** + * @param fieldName + * @return the index of field named fieldName in Schema. If field is not + * present, returns null. + */ + public Integer getPosition(String fieldName) { + return fieldPositionMap.get(fieldName); + } + + public HCatFieldSchema get(String fieldName) throws HCatException { + return get(getPosition(fieldName)); + } + + public List getFieldNames() { + return this.fieldNames; + } + + public HCatFieldSchema get(int position) { + return fieldSchemas.get(position); + } + + public int size() { + return fieldSchemas.size(); + } + + public void remove(final HCatFieldSchema hcatFieldSchema) throws HCatException { + + if (!fieldSchemas.contains(hcatFieldSchema)) { + throw new HCatException("Attempt to delete a non-existent column from HCat Schema: " + hcatFieldSchema); + } + + fieldSchemas.remove(hcatFieldSchema); + fieldPositionMap.remove(hcatFieldSchema.getName()); + fieldNames.remove(hcatFieldSchema.getName()); + } + + @Override + public String toString() { + boolean first = true; + StringBuilder sb = new StringBuilder(); + for (HCatFieldSchema hfs : fieldSchemas) { + if (!first) { + sb.append(","); + } else { + first = false; + } + if (hfs.getName() != null) { + sb.append(hfs.getName()); + sb.append(":"); + } + sb.append(hfs.toString()); + } + return sb.toString(); + } + + public String getSchemaAsTypeString() { + boolean first = true; + StringBuilder sb = new StringBuilder(); + for (HCatFieldSchema hfs : fieldSchemas) { + if (!first) { + sb.append(","); + } else { + first = false; + } + if (hfs.getName() != null) { + sb.append(hfs.getName()); + sb.append(":"); + } + sb.append(hfs.getTypeString()); + } + return sb.toString(); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (!(obj instanceof HCatSchema)) { + return false; + } + HCatSchema other = (HCatSchema) obj; + if (!this.getFields().equals(other.getFields())) { + return false; + } + return true; + } + + @Override + public int hashCode() { + return toString().hashCode(); + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/schema/HCatSchemaUtils.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/schema/HCatSchemaUtils.java new file mode 100644 index 0000000..e9fdb83 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/schema/HCatSchemaUtils.java @@ -0,0 +1,229 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.data.schema; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Schema; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema.Type; + + +public class HCatSchemaUtils { + + public static CollectionBuilder getStructSchemaBuilder() { + return new CollectionBuilder(); + } + + public static CollectionBuilder getListSchemaBuilder() { + return new CollectionBuilder(); + } + + public static MapBuilder getMapSchemaBuilder() { + return new MapBuilder(); + } + + + public static abstract class HCatSchemaBuilder { + public abstract HCatSchema build() throws HCatException; + } + + public static class CollectionBuilder extends HCatSchemaBuilder { // for STRUCTS(multiple-add-calls) and LISTS(single-add-call) + List fieldSchemas = null; + + CollectionBuilder() { + fieldSchemas = new ArrayList(); + } + + public CollectionBuilder addField(FieldSchema fieldSchema) throws HCatException { + return this.addField(getHCatFieldSchema(fieldSchema)); + } + + public CollectionBuilder addField(HCatFieldSchema fieldColumnSchema) { + fieldSchemas.add(fieldColumnSchema); + return this; + } + + @Override + public HCatSchema build() throws HCatException { + return new HCatSchema(fieldSchemas); + } + + } + + public static class MapBuilder extends HCatSchemaBuilder { + + Type keyType = null; + HCatSchema valueSchema = null; + + @Override + public HCatSchema build() throws HCatException { + List fslist = new ArrayList(); + fslist.add(new HCatFieldSchema(null, Type.MAP, keyType, valueSchema, null)); + return new HCatSchema(fslist); + } + + public MapBuilder withValueSchema(HCatSchema valueSchema) { + this.valueSchema = valueSchema; + return this; + } + + public MapBuilder withKeyType(Type keyType) { + this.keyType = keyType; + return this; + } + + } + + + /** + * Convert a HCatFieldSchema to a FieldSchema + * @param fs FieldSchema to convert + * @return HCatFieldSchema representation of FieldSchema + * @throws HCatException + */ + public static HCatFieldSchema getHCatFieldSchema(FieldSchema fs) throws HCatException { + String fieldName = fs.getName(); + TypeInfo baseTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fs.getType()); + return getHCatFieldSchema(fieldName, baseTypeInfo); + } + + private static HCatFieldSchema getHCatFieldSchema(String fieldName, TypeInfo fieldTypeInfo) throws HCatException { + Category typeCategory = fieldTypeInfo.getCategory(); + HCatFieldSchema hCatFieldSchema; + if (Category.PRIMITIVE == typeCategory) { + hCatFieldSchema = new HCatFieldSchema(fieldName, getPrimitiveHType(fieldTypeInfo), null); + } else if (Category.STRUCT == typeCategory) { + HCatSchema subSchema = constructHCatSchema((StructTypeInfo) fieldTypeInfo); + hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.STRUCT, subSchema, null); + } else if (Category.LIST == typeCategory) { + HCatSchema subSchema = getHCatSchema(((ListTypeInfo) fieldTypeInfo).getListElementTypeInfo()); + hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.ARRAY, subSchema, null); + } else if (Category.MAP == typeCategory) { + HCatFieldSchema.Type mapKeyType = getPrimitiveHType(((MapTypeInfo) fieldTypeInfo).getMapKeyTypeInfo()); + HCatSchema subSchema = getHCatSchema(((MapTypeInfo) fieldTypeInfo).getMapValueTypeInfo()); + hCatFieldSchema = new HCatFieldSchema(fieldName, HCatFieldSchema.Type.MAP, mapKeyType, subSchema, null); + } else { + throw new TypeNotPresentException(fieldTypeInfo.getTypeName(), null); + } + return hCatFieldSchema; + } + + private static Type getPrimitiveHType(TypeInfo basePrimitiveTypeInfo) { + switch (((PrimitiveTypeInfo) basePrimitiveTypeInfo).getPrimitiveCategory()) { + case BOOLEAN: + return Type.BOOLEAN; + case BYTE: + return Type.TINYINT; + case DOUBLE: + return Type.DOUBLE; + case FLOAT: + return Type.FLOAT; + case INT: + return Type.INT; + case LONG: + return Type.BIGINT; + case SHORT: + return Type.SMALLINT; + case STRING: + return Type.STRING; + case BINARY: + return Type.BINARY; + default: + throw new TypeNotPresentException(((PrimitiveTypeInfo) basePrimitiveTypeInfo).getTypeName(), null); + } + } + + public static HCatSchema getHCatSchema(Schema schema) throws HCatException { + return getHCatSchema(schema.getFieldSchemas()); + } + + public static HCatSchema getHCatSchema(List fslist) throws HCatException { + CollectionBuilder builder = getStructSchemaBuilder(); + for (FieldSchema fieldSchema : fslist) { + builder.addField(fieldSchema); + } + return builder.build(); + } + + private static HCatSchema constructHCatSchema(StructTypeInfo stypeInfo) throws HCatException { + CollectionBuilder builder = getStructSchemaBuilder(); + for (String fieldName : ((StructTypeInfo) stypeInfo).getAllStructFieldNames()) { + builder.addField(getHCatFieldSchema(fieldName, ((StructTypeInfo) stypeInfo).getStructFieldTypeInfo(fieldName))); + } + return builder.build(); + } + + public static HCatSchema getHCatSchema(TypeInfo typeInfo) throws HCatException { + Category typeCategory = typeInfo.getCategory(); + HCatSchema hCatSchema; + if (Category.PRIMITIVE == typeCategory) { + hCatSchema = getStructSchemaBuilder().addField(new HCatFieldSchema(null, getPrimitiveHType(typeInfo), null)).build(); + } else if (Category.STRUCT == typeCategory) { + HCatSchema subSchema = constructHCatSchema((StructTypeInfo) typeInfo); + hCatSchema = getStructSchemaBuilder().addField(new HCatFieldSchema(null, Type.STRUCT, subSchema, null)).build(); + } else if (Category.LIST == typeCategory) { + CollectionBuilder builder = getListSchemaBuilder(); + builder.addField(getHCatFieldSchema(null, ((ListTypeInfo) typeInfo).getListElementTypeInfo())); + hCatSchema = new HCatSchema(Arrays.asList(new HCatFieldSchema("", Type.ARRAY, builder.build(), ""))); + } else if (Category.MAP == typeCategory) { + HCatFieldSchema.Type mapKeyType = getPrimitiveHType(((MapTypeInfo) typeInfo).getMapKeyTypeInfo()); + HCatSchema subSchema = getHCatSchema(((MapTypeInfo) typeInfo).getMapValueTypeInfo()); + MapBuilder builder = getMapSchemaBuilder(); + hCatSchema = builder.withKeyType(mapKeyType).withValueSchema(subSchema).build(); + } else { + throw new TypeNotPresentException(typeInfo.getTypeName(), null); + } + return hCatSchema; + } + + public static HCatSchema getHCatSchemaFromTypeString(String typeString) throws HCatException { + return getHCatSchema(TypeInfoUtils.getTypeInfoFromTypeString(typeString)); + } + + public static HCatSchema getHCatSchema(String schemaString) throws HCatException { + if ((schemaString == null) || (schemaString.trim().isEmpty())) { + return new HCatSchema(new ArrayList()); // empty HSchema construct + } + HCatSchema outerSchema = getHCatSchemaFromTypeString("struct<" + schemaString + ">"); + return outerSchema.get(0).getStructSubSchema(); + } + + public static FieldSchema getFieldSchema(HCatFieldSchema hcatFieldSchema) { + return new FieldSchema(hcatFieldSchema.getName(), hcatFieldSchema.getTypeString(), hcatFieldSchema.getComment()); + } + + public static List getFieldSchemas(List hcatFieldSchemas) { + List lfs = new ArrayList(); + for (HCatFieldSchema hfs : hcatFieldSchemas) { + lfs.add(getFieldSchema(hfs)); + } + return lfs; + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/DataTransferFactory.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/DataTransferFactory.java new file mode 100644 index 0000000..5d9d9ef --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/DataTransferFactory.java @@ -0,0 +1,136 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.data.transfer; + +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hive.hcatalog.data.transfer.impl.HCatInputFormatReader; +import org.apache.hive.hcatalog.data.transfer.impl.HCatOutputFormatWriter; +import org.apache.hive.hcatalog.data.transfer.state.DefaultStateProvider; +import org.apache.hive.hcatalog.data.transfer.state.StateProvider; + +/** + * Use this factory to get instances of {@link HCatReader} or {@link HCatWriter} + * at master and slave nodes. + */ + +public class DataTransferFactory { + + /** + * This should be called once from master node to obtain an instance of + * {@link HCatReader}. + * + * @param re + * ReadEntity built using {@link ReadEntity.Builder} + * @param config + * any configuration which master node wants to pass to HCatalog + * @return {@link HCatReader} + */ + public static HCatReader getHCatReader(final ReadEntity re, + final Map config) { + // In future, this may examine ReadEntity and/or config to return + // appropriate HCatReader + return new HCatInputFormatReader(re, config); + } + + /** + * This should only be called once from every slave node to obtain an instance + * of {@link HCatReader}. + * + * @param split + * input split obtained at master node + * @param config + * configuration obtained at master node + * @return {@link HCatReader} + */ + public static HCatReader getHCatReader(final InputSplit split, + final Configuration config) { + // In future, this may examine config to return appropriate HCatReader + return getHCatReader(split, config, DefaultStateProvider.get()); + } + + /** + * This should only be called once from every slave node to obtain an instance + * of {@link HCatReader}. This should be called if an external system has some + * state to provide to HCatalog. + * + * @param split + * input split obtained at master node + * @param config + * configuration obtained at master node + * @param sp + * {@link StateProvider} + * @return {@link HCatReader} + */ + public static HCatReader getHCatReader(final InputSplit split, + final Configuration config, StateProvider sp) { + // In future, this may examine config to return appropriate HCatReader + return new HCatInputFormatReader(split, config, sp); + } + + /** + * This should be called at master node to obtain an instance of + * {@link HCatWriter}. + * + * @param we + * WriteEntity built using {@link WriteEntity.Builder} + * @param config + * any configuration which master wants to pass to HCatalog + * @return {@link HCatWriter} + */ + public static HCatWriter getHCatWriter(final WriteEntity we, + final Map config) { + // In future, this may examine WriteEntity and/or config to return + // appropriate HCatWriter + return new HCatOutputFormatWriter(we, config); + } + + /** + * This should be called at slave nodes to obtain an instance of + * {@link HCatWriter}. + * + * @param cntxt + * {@link WriterContext} obtained at master node + * @return {@link HCatWriter} + */ + public static HCatWriter getHCatWriter(final WriterContext cntxt) { + // In future, this may examine context to return appropriate HCatWriter + return getHCatWriter(cntxt, DefaultStateProvider.get()); + } + + /** + * This should be called at slave nodes to obtain an instance of + * {@link HCatWriter}. If an external system has some mechanism for providing + * state to HCatalog, this constructor can be used. + * + * @param cntxt + * {@link WriterContext} obtained at master node + * @param sp + * {@link StateProvider} + * @return {@link HCatWriter} + */ + public static HCatWriter getHCatWriter(final WriterContext cntxt, + final StateProvider sp) { + // In future, this may examine context to return appropriate HCatWriter + return new HCatOutputFormatWriter(cntxt.getConf(), sp); + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/EntityBase.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/EntityBase.java new file mode 100644 index 0000000..65f3c9b --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/EntityBase.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.data.transfer; + +import java.util.Map; + +/** + * This is a base class for + * {@link ReadEntity.Builder} / {@link WriteEntity.Builder}. + * Many fields in them are common, so this class + * contains the common fields. + */ + +abstract class EntityBase { + + String region; + String tableName; + String dbName; + Map partitionKVs; + + /** + * Common methods for {@link ReadEntity} and {@link WriteEntity} + */ + + abstract static class Entity extends EntityBase { + + public String getRegion() { + return region; + } + + public String getTableName() { + return tableName; + } + + public String getDbName() { + return dbName; + } + + public Map getPartitionKVs() { + return partitionKVs; + } + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/HCatReader.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/HCatReader.java new file mode 100644 index 0000000..8286389 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/HCatReader.java @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.data.transfer; + +import java.util.Iterator; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.transfer.state.StateProvider; + +/** + * This abstract class is internal to HCatalog and abstracts away the notion of + * underlying system from which reads will be done. + */ + +public abstract class HCatReader { + + /** + * This should be called at master node to obtain {@link ReaderContext} which + * then should be serialized and sent to slave nodes. + * + * @return {@link ReaderContext} + * @throws HCatException + */ + public abstract ReaderContext prepareRead() throws HCatException; + + /** + * This should be called at slave nodes to read {@link HCatRecord}s + * + * @return {@link Iterator} of {@link HCatRecord} + * @throws HCatException + */ + public abstract Iterator read() throws HCatException; + + /** + * This constructor will be invoked by {@link DataTransferFactory} at master + * node. Don't use this constructor. Instead, use {@link DataTransferFactory} + * + * @param re + * @param config + */ + protected HCatReader(final ReadEntity re, final Map config) { + this(config); + this.re = re; + } + + /** + * This constructor will be invoked by {@link DataTransferFactory} at slave + * nodes. Don't use this constructor. Instead, use {@link DataTransferFactory} + * + * @param config + * @param sp + */ + + protected HCatReader(final Configuration config, StateProvider sp) { + this.conf = config; + this.sp = sp; + } + + protected ReadEntity re; // This will be null at slaves. + protected Configuration conf; + protected ReaderContext info; + protected StateProvider sp; // This will be null at master. + + private HCatReader(final Map config) { + Configuration conf = new Configuration(); + if (null != config) { + for (Entry kv : config.entrySet()) { + conf.set(kv.getKey(), kv.getValue()); + } + } + this.conf = conf; + } + + public Configuration getConf() { + if (null == conf) { + throw new IllegalStateException( + "HCatReader is not constructed correctly."); + } + return conf; + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/HCatWriter.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/HCatWriter.java new file mode 100644 index 0000000..23ab8b6 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/HCatWriter.java @@ -0,0 +1,114 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.data.transfer; + +import java.util.Iterator; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.transfer.state.StateProvider; + +/** + * This abstraction is internal to HCatalog. This is to facilitate writing to + * HCatalog from external systems. Don't try to instantiate this directly. + * Instead, use {@link DataTransferFactory} + */ + +public abstract class HCatWriter { + + protected Configuration conf; + protected WriteEntity we; // This will be null at slave nodes. + protected WriterContext info; + protected StateProvider sp; + + /** + * External system should invoke this method exactly once from a master node. + * + * @return {@link WriterContext} This should be serialized and sent to slave + * nodes to construct HCatWriter there. + * @throws HCatException + */ + public abstract WriterContext prepareWrite() throws HCatException; + + /** + * This method should be used at slave needs to perform writes. + * + * @param recordItr + * {@link Iterator} records to be written into HCatalog. + * @throws {@link HCatException} + */ + public abstract void write(final Iterator recordItr) + throws HCatException; + + /** + * This method should be called at master node. Primary purpose of this is to + * do metadata commit. + * + * @throws {@link HCatException} + */ + public abstract void commit(final WriterContext context) throws HCatException; + + /** + * This method should be called at master node. Primary purpose of this is to + * do cleanups in case of failures. + * + * @throws {@link HCatException} * + */ + public abstract void abort(final WriterContext context) throws HCatException; + + /** + * This constructor will be used at master node + * + * @param we + * WriteEntity defines where in storage records should be written to. + * @param config + * Any configuration which external system wants to communicate to + * HCatalog for performing writes. + */ + protected HCatWriter(final WriteEntity we, final Map config) { + this(config); + this.we = we; + } + + /** + * This constructor will be used at slave nodes. + * + * @param config + */ + protected HCatWriter(final Configuration config, final StateProvider sp) { + this.conf = config; + this.sp = sp; + } + + private HCatWriter(final Map config) { + Configuration conf = new Configuration(); + if (config != null) { + // user is providing config, so it could be null. + for (Entry kv : config.entrySet()) { + conf.set(kv.getKey(), kv.getValue()); + } + } + + this.conf = conf; + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/ReadEntity.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/ReadEntity.java new file mode 100644 index 0000000..b7764ed --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/ReadEntity.java @@ -0,0 +1,89 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.data.transfer; + +import java.util.Map; + +public class ReadEntity extends EntityBase.Entity { + + private String filterString; + + /** + * Don't instantiate {@link ReadEntity} directly. Use, + * {@link ReadEntity.Builder} instead. + * + */ + private ReadEntity() { + // Not allowed + } + + private ReadEntity(Builder builder) { + + this.region = builder.region; + this.dbName = builder.dbName; + this.tableName = builder.tableName; + this.partitionKVs = builder.partitionKVs; + this.filterString = builder.filterString; + } + + public String getFilterString() { + return this.filterString; + } + + /** + * This class should be used to build {@link ReadEntity}. It follows builder + * pattern, letting you build your {@link ReadEntity} with whatever level of + * detail you want. + * + */ + public static class Builder extends EntityBase { + + private String filterString; + + public Builder withRegion(final String region) { + this.region = region; + return this; + } + + public Builder withDatabase(final String dbName) { + this.dbName = dbName; + return this; + } + + public Builder withTable(final String tblName) { + this.tableName = tblName; + return this; + } + + public Builder withPartition(final Map partKVs) { + this.partitionKVs = partKVs; + return this; + } + + public Builder withFilter(String filterString) { + this.filterString = filterString; + return this; + } + + public ReadEntity build() { + return new ReadEntity(this); + } + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/ReaderContext.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/ReaderContext.java new file mode 100644 index 0000000..98eeebe --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/ReaderContext.java @@ -0,0 +1,89 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.data.transfer; + +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hive.hcatalog.mapreduce.HCatSplit; + +/** + * This class will contain information of different {@link InputSplit} obtained + * at master node and configuration. This class implements + * {@link Externalizable} so it can be serialized using standard java + * mechanisms. + */ +public class ReaderContext implements Externalizable, Configurable { + + private static final long serialVersionUID = -2656468331739574367L; + private List splits; + private Configuration conf; + + public ReaderContext() { + this.splits = new ArrayList(); + this.conf = new Configuration(); + } + + public void setInputSplits(final List splits) { + this.splits = splits; + } + + public List getSplits() { + return splits; + } + + @Override + public Configuration getConf() { + return conf; + } + + @Override + public void setConf(final Configuration config) { + conf = config; + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + conf.write(out); + out.writeInt(splits.size()); + for (InputSplit split : splits) { + ((HCatSplit) split).write(out); + } + } + + @Override + public void readExternal(ObjectInput in) throws IOException, + ClassNotFoundException { + conf.readFields(in); + int numOfSplits = in.readInt(); + for (int i = 0; i < numOfSplits; i++) { + HCatSplit split = new HCatSplit(); + split.readFields(in); + splits.add(split); + } + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/WriteEntity.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/WriteEntity.java new file mode 100644 index 0000000..b71f363 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/WriteEntity.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.data.transfer; + +import java.util.Map; + +public class WriteEntity extends EntityBase.Entity { + + /** + * Don't instantiate {@link WriteEntity} directly. Use, {@link Builder} to + * build {@link WriteEntity}. + */ + + private WriteEntity() { + // Not allowed. + } + + private WriteEntity(Builder builder) { + this.region = builder.region; + this.dbName = builder.dbName; + this.tableName = builder.tableName; + this.partitionKVs = builder.partitionKVs; + } + + /** + * This class should be used to build {@link WriteEntity}. It follows builder + * pattern, letting you build your {@link WriteEntity} with whatever level of + * detail you want. + * + */ + public static class Builder extends EntityBase { + + public Builder withRegion(final String region) { + this.region = region; + return this; + } + + public Builder withDatabase(final String dbName) { + this.dbName = dbName; + return this; + } + + public Builder withTable(final String tblName) { + this.tableName = tblName; + return this; + } + + public Builder withPartition(final Map partKVs) { + this.partitionKVs = partKVs; + return this; + } + + public WriteEntity build() { + return new WriteEntity(this); + } + + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/WriterContext.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/WriterContext.java new file mode 100644 index 0000000..2dbf4ae --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/WriterContext.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.data.transfer; + +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; + +/** + * This contains information obtained at master node to help prepare slave nodes + * for writer. This class implements {@link Externalizable} so it can be + * serialized using standard java mechanisms. Master should serialize it and + * make it available to slaves to prepare for writes. + */ +public class WriterContext implements Externalizable, Configurable { + + private static final long serialVersionUID = -5899374262971611840L; + private Configuration conf; + + public WriterContext() { + conf = new Configuration(); + } + + @Override + public Configuration getConf() { + return conf; + } + + @Override + public void setConf(final Configuration config) { + this.conf = config; + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + conf.write(out); + } + + @Override + public void readExternal(ObjectInput in) throws IOException, + ClassNotFoundException { + conf.readFields(in); + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/impl/HCatInputFormatReader.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/impl/HCatInputFormatReader.java new file mode 100644 index 0000000..bdedd95 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/impl/HCatInputFormatReader.java @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.data.transfer.impl; + +import java.io.IOException; +import java.util.Iterator; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hive.hcatalog.common.ErrorType; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.transfer.HCatReader; +import org.apache.hive.hcatalog.data.transfer.ReadEntity; +import org.apache.hive.hcatalog.data.transfer.ReaderContext; +import org.apache.hive.hcatalog.data.transfer.state.StateProvider; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; + +/** + * This reader reads via {@link HCatInputFormat} + * + */ +public class HCatInputFormatReader extends HCatReader { + + private InputSplit split; + + public HCatInputFormatReader(InputSplit split, Configuration config, + StateProvider sp) { + super(config, sp); + this.split = split; + } + + public HCatInputFormatReader(ReadEntity info, Map config) { + super(info, config); + } + + @Override + public ReaderContext prepareRead() throws HCatException { + try { + Job job = new Job(conf); + HCatInputFormat hcif = HCatInputFormat.setInput( + job, re.getDbName(), re.getTableName()).setFilter(re.getFilterString()); + ReaderContext cntxt = new ReaderContext(); + cntxt.setInputSplits(hcif.getSplits( + ShimLoader.getHadoopShims().getHCatShim().createJobContext(job.getConfiguration(), null))); + cntxt.setConf(job.getConfiguration()); + return cntxt; + } catch (IOException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + } catch (InterruptedException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + } + } + + @Override + public Iterator read() throws HCatException { + + HCatInputFormat inpFmt = new HCatInputFormat(); + RecordReader rr; + try { + TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, new TaskAttemptID()); + rr = inpFmt.createRecordReader(split, cntxt); + rr.initialize(split, cntxt); + } catch (IOException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + } catch (InterruptedException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + } + return new HCatRecordItr(rr); + } + + private static class HCatRecordItr implements Iterator { + + private RecordReader curRecReader; + + HCatRecordItr(RecordReader rr) { + curRecReader = rr; + } + + @Override + public boolean hasNext() { + try { + boolean retVal = curRecReader.nextKeyValue(); + if (retVal) { + return true; + } + // if its false, we need to close recordReader. + curRecReader.close(); + return false; + } catch (IOException e) { + throw new RuntimeException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + + @Override + public HCatRecord next() { + try { + return curRecReader.getCurrentValue(); + } catch (IOException e) { + throw new RuntimeException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + + @Override + public void remove() { + throw new UnsupportedOperationException("Not allowed"); + } + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/impl/HCatOutputFormatWriter.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/impl/HCatOutputFormatWriter.java new file mode 100644 index 0000000..63379d6 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/impl/HCatOutputFormatWriter.java @@ -0,0 +1,162 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.data.transfer.impl; + +import java.io.IOException; +import java.util.Iterator; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobStatus.State; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hive.hcatalog.common.ErrorType; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.transfer.HCatWriter; +import org.apache.hive.hcatalog.data.transfer.WriteEntity; +import org.apache.hive.hcatalog.data.transfer.WriterContext; +import org.apache.hive.hcatalog.data.transfer.state.StateProvider; +import org.apache.hive.hcatalog.mapreduce.HCatOutputFormat; +import org.apache.hive.hcatalog.mapreduce.OutputJobInfo; + +/** + * This writer writes via {@link HCatOutputFormat} + * + */ +public class HCatOutputFormatWriter extends HCatWriter { + + public HCatOutputFormatWriter(WriteEntity we, Map config) { + super(we, config); + } + + public HCatOutputFormatWriter(Configuration config, StateProvider sp) { + super(config, sp); + } + + @Override + public WriterContext prepareWrite() throws HCatException { + OutputJobInfo jobInfo = OutputJobInfo.create(we.getDbName(), + we.getTableName(), we.getPartitionKVs()); + Job job; + try { + job = new Job(conf); + HCatOutputFormat.setOutput(job, jobInfo); + HCatOutputFormat.setSchema(job, HCatOutputFormat.getTableSchema(job)); + HCatOutputFormat outFormat = new HCatOutputFormat(); + outFormat.checkOutputSpecs(job); + outFormat.getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( + job.getConfiguration(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())).setupJob(job); + } catch (IOException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + } catch (InterruptedException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + } + WriterContext cntxt = new WriterContext(); + cntxt.setConf(job.getConfiguration()); + return cntxt; + } + + @Override + public void write(Iterator recordItr) throws HCatException { + + int id = sp.getId(); + setVarsInConf(id); + HCatOutputFormat outFormat = new HCatOutputFormat(); + TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( + conf, new TaskAttemptID(ShimLoader.getHadoopShims().getHCatShim().createTaskID(), id)); + OutputCommitter committer = null; + RecordWriter, HCatRecord> writer; + try { + committer = outFormat.getOutputCommitter(cntxt); + committer.setupTask(cntxt); + writer = outFormat.getRecordWriter(cntxt); + while (recordItr.hasNext()) { + HCatRecord rec = recordItr.next(); + writer.write(null, rec); + } + writer.close(cntxt); + if (committer.needsTaskCommit(cntxt)) { + committer.commitTask(cntxt); + } + } catch (IOException e) { + if (null != committer) { + try { + committer.abortTask(cntxt); + } catch (IOException e1) { + throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1); + } + } + throw new HCatException("Failed while writing", e); + } catch (InterruptedException e) { + if (null != committer) { + try { + committer.abortTask(cntxt); + } catch (IOException e1) { + throw new HCatException(ErrorType.ERROR_INTERNAL_EXCEPTION, e1); + } + } + throw new HCatException("Failed while writing", e); + } + } + + @Override + public void commit(WriterContext context) throws HCatException { + try { + new HCatOutputFormat().getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( + context.getConf(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())) + .commitJob(ShimLoader.getHadoopShims().getHCatShim().createJobContext(context.getConf(), null)); + } catch (IOException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + } catch (InterruptedException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + } + } + + @Override + public void abort(WriterContext context) throws HCatException { + try { + new HCatOutputFormat().getOutputCommitter(ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( + context.getConf(), ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID())) + .abortJob(ShimLoader.getHadoopShims().getHCatShim().createJobContext( + context.getConf(), null), State.FAILED); + } catch (IOException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + } catch (InterruptedException e) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e); + } + } + + private void setVarsInConf(int id) { + + // Following two config keys are required by FileOutputFormat to work + // correctly. + // In usual case of Hadoop, JobTracker will set these before launching + // tasks. + // Since there is no jobtracker here, we set it ourself. + conf.setInt("mapred.task.partition", id); + conf.set("mapred.task.id", "attempt__0000_r_000000_" + id); + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/state/DefaultStateProvider.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/state/DefaultStateProvider.java new file mode 100644 index 0000000..9f5a5dc --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/state/DefaultStateProvider.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.data.transfer.state; + +import java.text.NumberFormat; +import java.util.Random; + +public class DefaultStateProvider implements StateProvider { + + /** + * Default implementation. Here, ids are generated randomly. + */ + @Override + public int getId() { + + NumberFormat numberFormat = NumberFormat.getInstance(); + numberFormat.setMinimumIntegerDigits(5); + numberFormat.setGroupingUsed(false); + return Integer + .parseInt(numberFormat.format(Math.abs(new Random().nextInt()))); + } + + private static StateProvider sp; + + public static synchronized StateProvider get() { + if (null == sp) { + sp = new DefaultStateProvider(); + } + return sp; + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/state/StateProvider.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/state/StateProvider.java new file mode 100644 index 0000000..767cd90 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/data/transfer/state/StateProvider.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.data.transfer.state; + +/** + * If external system wants to communicate any state to slaves, they can do so + * via this interface. One example of this in case of Map-Reduce is ids assigned + * by JobTracker to TaskTracker. + */ +public interface StateProvider { + + /** + * This method should return id assigned to slave node. + * + * @return id + */ + public int getId(); +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/har/HarOutputCommitterPostProcessor.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/har/HarOutputCommitterPostProcessor.java new file mode 100644 index 0000000..4086cbe --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/har/HarOutputCommitterPostProcessor.java @@ -0,0 +1,125 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.har; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.tools.HadoopArchives; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatException; + +public class HarOutputCommitterPostProcessor { + + boolean isEnabled = false; + + public boolean isEnabled() { + return isEnabled; + } + + public void setEnabled(boolean enabled) { + this.isEnabled = enabled; + } + + + public void exec(JobContext context, Partition partition, Path partPath) throws IOException { +// LOG.info("Archiving partition ["+partPath.toString()+"]"); + makeHar(context, partPath.toUri().toString(), harFile(partPath)); + partition.getParameters().put(hive_metastoreConstants.IS_ARCHIVED, "true"); + } + + public String harFile(Path ptnPath) throws IOException { + String harFile = ptnPath.toString().replaceFirst("/+$", "") + ".har"; +// LOG.info("har file : " + harFile); + return harFile; + } + + public String getParentFSPath(Path ptnPath) throws IOException { + return ptnPath.toUri().getPath().replaceFirst("/+$", ""); + } + + public String getProcessedLocation(Path ptnPath) throws IOException { + String harLocn = ("har://" + ptnPath.toUri().getPath()).replaceFirst("/+$", "") + ".har" + Path.SEPARATOR; +// LOG.info("har location : " + harLocn); + return harLocn; + } + + + /** + * Creates a har file from the contents of a given directory, using that as root. + * @param dir Directory to archive + * @param harFile The HAR file to create + */ + public static void makeHar(JobContext context, String dir, String harFile) throws IOException { +// Configuration conf = context.getConfiguration(); +// Credentials creds = context.getCredentials(); + +// HCatUtil.logAllTokens(LOG,context); + + int lastSep = harFile.lastIndexOf(Path.SEPARATOR_CHAR); + Path archivePath = new Path(harFile.substring(0, lastSep)); + final String[] args = { + "-archiveName", + harFile.substring(lastSep + 1, harFile.length()), + "-p", + dir, + "*", + archivePath.toString() + }; +// for (String arg : args){ +// LOG.info("Args to har : "+ arg); +// } + try { + Configuration newConf = new Configuration(); + FileSystem fs = archivePath.getFileSystem(newConf); + + String hadoopTokenFileLocationEnvSetting = System.getenv(HCatConstants.SYSENV_HADOOP_TOKEN_FILE_LOCATION); + if ((hadoopTokenFileLocationEnvSetting != null) && (!hadoopTokenFileLocationEnvSetting.isEmpty())) { + newConf.set(HCatConstants.CONF_MAPREDUCE_JOB_CREDENTIALS_BINARY, hadoopTokenFileLocationEnvSetting); +// LOG.info("System.getenv(\"HADOOP_TOKEN_FILE_LOCATION\") =["+ System.getenv("HADOOP_TOKEN_FILE_LOCATION")+"]"); + } +// for (FileStatus ds : fs.globStatus(new Path(dir, "*"))){ +// LOG.info("src : "+ds.getPath().toUri().toString()); +// } + + final HadoopArchives har = new HadoopArchives(newConf); + int rc = ToolRunner.run(har, args); + if (rc != 0) { + throw new Exception("Har returned error code " + rc); + } + +// for (FileStatus hs : fs.globStatus(new Path(harFile, "*"))){ +// LOG.info("dest : "+hs.getPath().toUri().toString()); +// } +// doHarCheck(fs,harFile); +// LOG.info("Nuking " + dir); + fs.delete(new Path(dir), true); + } catch (Exception e) { + throw new HCatException("Error creating Har [" + harFile + "] from [" + dir + "]", e); + } + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultOutputCommitterContainer.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultOutputCommitterContainer.java new file mode 100644 index 0000000..a11affc --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultOutputCommitterContainer.java @@ -0,0 +1,107 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import java.io.IOException; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.JobStatus.State; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Part of the DefaultOutput*Container classes + * See {@link DefaultOutputFormatContainer} for more information + */ +class DefaultOutputCommitterContainer extends OutputCommitterContainer { + + private static final Logger LOG = LoggerFactory.getLogger(DefaultOutputCommitterContainer.class); + + /** + * @param context current JobContext + * @param baseCommitter OutputCommitter to contain + * @throws IOException + */ + public DefaultOutputCommitterContainer(JobContext context, org.apache.hadoop.mapred.OutputCommitter baseCommitter) throws IOException { + super(context, baseCommitter); + } + + @Override + public void abortTask(TaskAttemptContext context) throws IOException { + getBaseOutputCommitter().abortTask(HCatMapRedUtil.createTaskAttemptContext(context)); + } + + @Override + public void commitTask(TaskAttemptContext context) throws IOException { + getBaseOutputCommitter().commitTask(HCatMapRedUtil.createTaskAttemptContext(context)); + } + + @Override + public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { + return getBaseOutputCommitter().needsTaskCommit(HCatMapRedUtil.createTaskAttemptContext(context)); + } + + @Override + public void setupJob(JobContext context) throws IOException { + getBaseOutputCommitter().setupJob(HCatMapRedUtil.createJobContext(context)); + } + + @Override + public void setupTask(TaskAttemptContext context) throws IOException { + getBaseOutputCommitter().setupTask(HCatMapRedUtil.createTaskAttemptContext(context)); + } + + @Override + public void abortJob(JobContext jobContext, State state) throws IOException { + getBaseOutputCommitter().abortJob(HCatMapRedUtil.createJobContext(jobContext), state); + cleanupJob(jobContext); + } + + @Override + public void commitJob(JobContext jobContext) throws IOException { + getBaseOutputCommitter().commitJob(HCatMapRedUtil.createJobContext(jobContext)); + cleanupJob(jobContext); + } + + @Override + public void cleanupJob(JobContext context) throws IOException { + getBaseOutputCommitter().cleanupJob(HCatMapRedUtil.createJobContext(context)); + + //Cancel HCat and JobTracker tokens + HiveMetaStoreClient client = null; + try { + HiveConf hiveConf = HCatUtil.getHiveConf(context.getConfiguration()); + client = HCatUtil.getHiveClient(hiveConf); + String tokenStrForm = client.getTokenStrForm(); + if (tokenStrForm != null && context.getConfiguration().get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { + client.cancelDelegationToken(tokenStrForm); + } + } catch (Exception e) { + LOG.warn("Failed to cancel delegation token", e); + } finally { + HCatUtil.closeHiveClientQuietly(client); + } + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultOutputFormatContainer.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultOutputFormatContainer.java new file mode 100644 index 0000000..c641a82 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultOutputFormatContainer.java @@ -0,0 +1,101 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; + +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.HCatRecord; + +import java.io.IOException; +import java.text.NumberFormat; + +/** + * Bare bones implementation of OutputFormatContainer. Does only the required + * tasks to work properly with HCatalog. HCatalog features which require a + * storage specific implementation are unsupported (ie partitioning). + */ +class DefaultOutputFormatContainer extends OutputFormatContainer { + + private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance(); + + static { + NUMBER_FORMAT.setMinimumIntegerDigits(5); + NUMBER_FORMAT.setGroupingUsed(false); + } + + public DefaultOutputFormatContainer(org.apache.hadoop.mapred.OutputFormat, Writable> of) { + super(of); + } + + static synchronized String getOutputName(int partition) { + return "part-" + NUMBER_FORMAT.format(partition); + } + + /** + * Get the record writer for the job. Uses the storagehandler's OutputFormat + * to get the record writer. + * @param context the information about the current task. + * @return a RecordWriter to write the output for the job. + * @throws IOException + */ + @Override + public RecordWriter, HCatRecord> + getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { + String name = getOutputName(context.getTaskAttemptID().getTaskID().getId()); + return new DefaultRecordWriterContainer(context, + getBaseOutputFormat().getRecordWriter(null, new JobConf(context.getConfiguration()), name, InternalUtil.createReporter(context))); + } + + + /** + * Get the output committer for this output format. This is responsible + * for ensuring the output is committed correctly. + * @param context the task context + * @return an output committer + * @throws IOException + * @throws InterruptedException + */ + @Override + public OutputCommitter getOutputCommitter(TaskAttemptContext context) + throws IOException, InterruptedException { + return new DefaultOutputCommitterContainer(context, new JobConf(context.getConfiguration()).getOutputCommitter()); + } + + /** + * Check for validity of the output-specification for the job. + * @param context information about the job + * @throws IOException when output should not be attempted + */ + @Override + public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { + org.apache.hadoop.mapred.OutputFormat, ? super Writable> outputFormat = getBaseOutputFormat(); + JobConf jobConf = new JobConf(context.getConfiguration()); + outputFormat.checkOutputSpecs(null, jobConf); + HCatUtil.copyConf(jobConf, context.getConfiguration()); + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultRecordWriterContainer.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultRecordWriterContainer.java new file mode 100644 index 0000000..96587d4 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/DefaultRecordWriterContainer.java @@ -0,0 +1,82 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import java.io.IOException; + +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.HCatRecord; + +/** + * Part of the DefaultOutput*Container classes + * See {@link DefaultOutputFormatContainer} for more information + */ +class DefaultRecordWriterContainer extends RecordWriterContainer { + + private final HCatStorageHandler storageHandler; + private final SerDe serDe; + private final OutputJobInfo jobInfo; + private final ObjectInspector hcatRecordOI; + + /** + * @param context current JobContext + * @param baseRecordWriter RecordWriter to contain + * @throws IOException + * @throws InterruptedException + */ + public DefaultRecordWriterContainer(TaskAttemptContext context, + org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseRecordWriter) throws IOException, InterruptedException { + super(context, baseRecordWriter); + jobInfo = HCatOutputFormat.getJobInfo(context); + storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); + HCatOutputFormat.configureOutputStorageHandler(context); + serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); + hcatRecordOI = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); + try { + InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); + } catch (SerDeException e) { + throw new IOException("Failed to initialize SerDe", e); + } + } + + @Override + public void close(TaskAttemptContext context) throws IOException, + InterruptedException { + getBaseRecordWriter().close(InternalUtil.createReporter(context)); + } + + @Override + public void write(WritableComparable key, HCatRecord value) throws IOException, + InterruptedException { + try { + getBaseRecordWriter().write(null, serDe.serialize(value.getAll(), hcatRecordOI)); + } catch (SerDeException e) { + throw new IOException("Failed to serialize object", e); + } + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java new file mode 100644 index 0000000..a843a5b --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java @@ -0,0 +1,750 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.InvalidOperationException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.JobStatus.State; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hive.hcatalog.common.ErrorType; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils; +import org.apache.hive.hcatalog.har.HarOutputCommitterPostProcessor; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Part of the FileOutput*Container classes + * See {@link FileOutputFormatContainer} for more information + */ +class FileOutputCommitterContainer extends OutputCommitterContainer { + + private static final String TEMP_DIR_NAME = "_temporary"; + private static final String LOGS_DIR_NAME = "_logs"; + + private static final Logger LOG = LoggerFactory.getLogger(FileOutputCommitterContainer.class); + private final boolean dynamicPartitioningUsed; + private boolean partitionsDiscovered; + + private Map> partitionsDiscoveredByPath; + private Map contextDiscoveredByPath; + private final HCatStorageHandler cachedStorageHandler; + + HarOutputCommitterPostProcessor harProcessor = new HarOutputCommitterPostProcessor(); + + private String ptnRootLocation = null; + + private OutputJobInfo jobInfo = null; + + /** + * @param context current JobContext + * @param baseCommitter OutputCommitter to contain + * @throws IOException + */ + public FileOutputCommitterContainer(JobContext context, + org.apache.hadoop.mapred.OutputCommitter baseCommitter) throws IOException { + super(context, baseCommitter); + jobInfo = HCatOutputFormat.getJobInfo(context); + dynamicPartitioningUsed = jobInfo.isDynamicPartitioningUsed(); + + this.partitionsDiscovered = !dynamicPartitioningUsed; + cachedStorageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); + } + + @Override + public void abortTask(TaskAttemptContext context) throws IOException { + if (!dynamicPartitioningUsed) { + getBaseOutputCommitter().abortTask(HCatMapRedUtil.createTaskAttemptContext(context)); + } + } + + @Override + public void commitTask(TaskAttemptContext context) throws IOException { + if (!dynamicPartitioningUsed) { + //See HCATALOG-499 + FileOutputFormatContainer.setWorkOutputPath(context); + getBaseOutputCommitter().commitTask(HCatMapRedUtil.createTaskAttemptContext(context)); + } + } + + @Override + public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { + if (!dynamicPartitioningUsed) { + return getBaseOutputCommitter().needsTaskCommit(HCatMapRedUtil.createTaskAttemptContext(context)); + } else { + // called explicitly through FileRecordWriterContainer.close() if dynamic - return false by default + return false; + } + } + + @Override + public void setupJob(JobContext context) throws IOException { + if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { + getBaseOutputCommitter().setupJob(HCatMapRedUtil.createJobContext(context)); + } + // in dynamic usecase, called through FileRecordWriterContainer + } + + @Override + public void setupTask(TaskAttemptContext context) throws IOException { + if (!dynamicPartitioningUsed) { + getBaseOutputCommitter().setupTask(HCatMapRedUtil.createTaskAttemptContext(context)); + } + } + + @Override + public void abortJob(JobContext jobContext, State state) throws IOException { + try { + if (dynamicPartitioningUsed) { + discoverPartitions(jobContext); + } + org.apache.hadoop.mapred.JobContext mapRedJobContext = HCatMapRedUtil + .createJobContext(jobContext); + if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { + getBaseOutputCommitter().abortJob(mapRedJobContext, state); + } else if (dynamicPartitioningUsed) { + for (JobContext currContext : contextDiscoveredByPath.values()) { + try { + new JobConf(currContext.getConfiguration()) + .getOutputCommitter().abortJob(currContext, + state); + } catch (Exception e) { + throw new IOException(e); + } + } + } + Path src; + OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext); + if (dynamicPartitioningUsed) { + src = new Path(getPartitionRootLocation(jobInfo.getLocation(), jobInfo.getTableInfo().getTable() + .getPartitionKeysSize())); + } else { + src = new Path(jobInfo.getLocation()); + } + FileSystem fs = src.getFileSystem(jobContext.getConfiguration()); + // Note fs.delete will fail on Windows. The reason is in OutputCommitter, + // Hadoop is still writing to _logs/history. On Linux, OS don't care file is still + // open and remove the directory anyway, but on Windows, OS refuse to remove a + // directory containing open files. So on Windows, we will leave output directory + // behind when job fail. User needs to remove the output directory manually + LOG.info("Job failed. Try cleaning up temporary directory [{}].", src); + fs.delete(src, true); + } finally { + cancelDelegationTokens(jobContext); + } + } + + public static final String SUCCEEDED_FILE_NAME = "_SUCCESS"; + static final String SUCCESSFUL_JOB_OUTPUT_DIR_MARKER = + "mapreduce.fileoutputcommitter.marksuccessfuljobs"; + + private static boolean getOutputDirMarking(Configuration conf) { + return conf.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, + false); + } + + @Override + public void commitJob(JobContext jobContext) throws IOException { + try { + if (dynamicPartitioningUsed) { + discoverPartitions(jobContext); + // Commit each partition so it gets moved out of the job work + // dir + for (JobContext context : contextDiscoveredByPath.values()) { + new JobConf(context.getConfiguration()) + .getOutputCommitter().commitJob(context); + } + } + if (getBaseOutputCommitter() != null && !dynamicPartitioningUsed) { + getBaseOutputCommitter().commitJob( + HCatMapRedUtil.createJobContext(jobContext)); + } + registerPartitions(jobContext); + // create _SUCCESS FILE if so requested. + OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext); + if (getOutputDirMarking(jobContext.getConfiguration())) { + Path outputPath = new Path(jobInfo.getLocation()); + FileSystem fileSys = outputPath.getFileSystem(jobContext + .getConfiguration()); + // create a file in the folder to mark it + if (fileSys.exists(outputPath)) { + Path filePath = new Path(outputPath, + SUCCEEDED_FILE_NAME); + if (!fileSys.exists(filePath)) { // may have been + // created by + // baseCommitter.commitJob() + fileSys.create(filePath).close(); + } + } + } + } finally { + cancelDelegationTokens(jobContext); + } + } + + @Override + public void cleanupJob(JobContext context) throws IOException { + throw new IOException("The method cleanupJob is deprecated and should not be called."); + } + + private String getPartitionRootLocation(String ptnLocn, int numPtnKeys) { + if (ptnRootLocation == null) { + // we only need to calculate it once, it'll be the same for other partitions in this job. + Path ptnRoot = new Path(ptnLocn); + for (int i = 0; i < numPtnKeys; i++) { +// LOG.info("Getting parent of "+ptnRoot.getName()); + ptnRoot = ptnRoot.getParent(); + } + ptnRootLocation = ptnRoot.toString(); + } +// LOG.info("Returning final parent : "+ptnRootLocation); + return ptnRootLocation; + } + + /** + * Generate partition metadata object to be used to add to metadata. + * @param context The job context. + * @param jobInfo The OutputJobInfo. + * @param partLocnRoot The table-equivalent location root of the partition + * (temporary dir if dynamic partition, table dir if static) + * @param partKVs The keyvalue pairs that form the partition + * @param outputSchema The output schema for the partition + * @param params The parameters to store inside the partition + * @param table The Table metadata object under which this Partition will reside + * @param fs FileSystem object to operate on the underlying filesystem + * @param grpName Group name that owns the table dir + * @param perms FsPermission that's the default permission of the table dir. + * @return Constructed Partition metadata object + * @throws java.io.IOException + */ + + private Partition constructPartition( + JobContext context, OutputJobInfo jobInfo, + String partLocnRoot, Map partKVs, + HCatSchema outputSchema, Map params, + Table table, FileSystem fs, + String grpName, FsPermission perms) throws IOException { + + Partition partition = new Partition(); + partition.setDbName(table.getDbName()); + partition.setTableName(table.getTableName()); + partition.setSd(new StorageDescriptor(table.getTTable().getSd())); + + List fields = new ArrayList(); + for (HCatFieldSchema fieldSchema : outputSchema.getFields()) { + fields.add(HCatSchemaUtils.getFieldSchema(fieldSchema)); + } + + partition.getSd().setCols(fields); + + partition.setValues(FileOutputFormatContainer.getPartitionValueList(table, partKVs)); + + partition.setParameters(params); + + // Sets permissions and group name on partition dirs and files. + + Path partPath; + if (Boolean.valueOf((String)table.getProperty("EXTERNAL")) + && jobInfo.getLocation() != null && jobInfo.getLocation().length() > 0) { + // honor external table that specifies the location + partPath = new Path(jobInfo.getLocation()); + } else { + partPath = new Path(partLocnRoot); + int i = 0; + for (FieldSchema partKey : table.getPartitionKeys()) { + if (i++ != 0) { + applyGroupAndPerms(fs, partPath, perms, grpName, false); + } + partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs); + } + } + + // Apply the group and permissions to the leaf partition and files. + // Need not bother in case of HDFS as permission is taken care of by setting UMask + if (!ShimLoader.getHadoopShims().getHCatShim().isFileInHDFS(fs, partPath)) { + applyGroupAndPerms(fs, partPath, perms, grpName, true); + } + + // Set the location in the StorageDescriptor + if (dynamicPartitioningUsed) { + String dynamicPartitionDestination = getFinalDynamicPartitionDestination(table, partKVs); + if (harProcessor.isEnabled()) { + harProcessor.exec(context, partition, partPath); + partition.getSd().setLocation( + harProcessor.getProcessedLocation(new Path(dynamicPartitionDestination))); + } else { + partition.getSd().setLocation(dynamicPartitionDestination); + } + } else { + partition.getSd().setLocation(partPath.toString()); + } + return partition; + } + + private void applyGroupAndPerms(FileSystem fs, Path dir, FsPermission permission, + String group, boolean recursive) + throws IOException { + fs.setPermission(dir, permission); + if (recursive) { + for (FileStatus fileStatus : fs.listStatus(dir)) { + if (fileStatus.isDir()) { + applyGroupAndPerms(fs, fileStatus.getPath(), permission, group, true); + } else { + fs.setPermission(fileStatus.getPath(), permission); + } + } + } + } + + private String getFinalDynamicPartitionDestination(Table table, Map partKVs) { + // file:///tmp/hcat_junit_warehouse/employee/_DYN0.7770480401313761/emp_country=IN/emp_state=KA -> + // file:///tmp/hcat_junit_warehouse/employee/emp_country=IN/emp_state=KA + Path partPath = new Path(table.getTTable().getSd().getLocation()); + for (FieldSchema partKey : table.getPartitionKeys()) { + partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs); + } + return partPath.toString(); + } + + private Map getStorerParameterMap(StorerInfo storer) { + Map params = new HashMap(); + + //Copy table level hcat.* keys to the partition + for (Entry entry : storer.getProperties().entrySet()) { + params.put(entry.getKey().toString(), entry.getValue().toString()); + } + return params; + } + + private Path constructPartialPartPath(Path partialPath, String partKey, Map partKVs) { + + StringBuilder sb = new StringBuilder(FileUtils.escapePathName(partKey)); + sb.append("="); + sb.append(FileUtils.escapePathName(partKVs.get(partKey))); + return new Path(partialPath, sb.toString()); + } + + /** + * Update table schema, adding new columns as added for the partition. + * @param client the client + * @param table the table + * @param partitionSchema the schema of the partition + * @throws java.io.IOException Signals that an I/O exception has occurred. + * @throws org.apache.hadoop.hive.metastore.api.InvalidOperationException the invalid operation exception + * @throws org.apache.hadoop.hive.metastore.api.MetaException the meta exception + * @throws org.apache.thrift.TException the t exception + */ + private void updateTableSchema(HiveMetaStoreClient client, Table table, + HCatSchema partitionSchema) throws IOException, InvalidOperationException, MetaException, TException { + + + List newColumns = HCatUtil.validatePartitionSchema(table, partitionSchema); + + if (newColumns.size() != 0) { + List tableColumns = new ArrayList(table.getTTable().getSd().getCols()); + tableColumns.addAll(newColumns); + + //Update table schema to add the newly added columns + table.getTTable().getSd().setCols(tableColumns); + client.alter_table(table.getDbName(), table.getTableName(), table.getTTable()); + } + } + + /** + * Move all of the files from the temp directory to the final location + * @param fs the output file system + * @param file the file to move + * @param srcDir the source directory + * @param destDir the target directory + * @param dryRun - a flag that simply tests if this move would succeed or not based + * on whether other files exist where we're trying to copy + * @throws java.io.IOException + */ + private void moveTaskOutputs(FileSystem fs, + Path file, + Path srcDir, + Path destDir, final boolean dryRun) throws IOException { + + if (file.getName().equals(TEMP_DIR_NAME) || file.getName().equals(LOGS_DIR_NAME) || file.getName().equals(SUCCEEDED_FILE_NAME)) { + return; + } + final Path finalOutputPath = getFinalPath(file, srcDir, destDir); + if (fs.isFile(file)) { + if (dryRun){ + if(LOG.isDebugEnabled()) { + LOG.debug("Testing if moving file: [" + file + "] to [" + + finalOutputPath + "] would cause a problem"); + } + if (fs.exists(finalOutputPath)) { + throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Data already exists in " + finalOutputPath + + ", duplicate publish not possible."); + } + } else { + if(LOG.isDebugEnabled()) { + LOG.debug("Moving file: [ " + file + "] to [" + finalOutputPath + "]"); + } + // Make sure the parent directory exists. It is not an error + // to recreate an existing directory + fs.mkdirs(finalOutputPath.getParent()); + if (!fs.rename(file, finalOutputPath)) { + if (!fs.delete(finalOutputPath, true)) { + throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Failed to delete existing path " + finalOutputPath); + } + if (!fs.rename(file, finalOutputPath)) { + throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Failed to move output to " + finalOutputPath); + } + } + } + } else if(fs.getFileStatus(file).isDir()) { + FileStatus[] children = fs.listStatus(file); + FileStatus firstChild = null; + if (children != null) { + int index=0; + while (index < children.length) { + if (!children[index].getPath().getName().equals(TEMP_DIR_NAME) && !children[index].getPath().getName().equals(LOGS_DIR_NAME) && !children[index].getPath().getName().equals(SUCCEEDED_FILE_NAME)) { + firstChild = children[index]; + break; + } + index++; + } + } + if(firstChild!=null && firstChild.isDir()) { + // If the first child is directory, then rest would be directory too according to HCatalog dir structure + // recurse in that case + for (FileStatus child : children) { + moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun); + } + } else { + + if (!dryRun) { + if (dynamicPartitioningUsed) { + // Optimization: if the first child is file, we have reached the leaf directory, move the parent directory itself + // instead of moving each file under the directory. See HCATALOG-538 + + final Path parentDir = finalOutputPath.getParent(); + // Create the directory + Path placeholder = new Path(parentDir, "_placeholder"); + if (fs.mkdirs(parentDir)) { + // It is weired but we need a placeholder, + // otherwise rename cannot move file to the right place + fs.create(placeholder).close(); + } + if (LOG.isDebugEnabled()) { + LOG.debug("Moving directory: " + file + " to " + parentDir); + } + if (!fs.rename(file, parentDir)) { + final String msg = "Failed to move file: " + file + " to " + parentDir; + LOG.error(msg); + throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg); + } + fs.delete(placeholder, false); + } else { + // In case of no partition we have to move each file + for (FileStatus child : children) { + moveTaskOutputs(fs, child.getPath(), srcDir, destDir, dryRun); + } + } + } else { + if(fs.exists(finalOutputPath)) { + throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Data already exists in " + finalOutputPath + + ", duplicate publish not possible."); + } + } + } + } else { + // Should never happen + final String msg = "Unknown file type being asked to be moved, erroring out"; + throw new HCatException(ErrorType.ERROR_MOVE_FAILED, msg); + } + } + + /** + * Find the final name of a given output file, given the output directory + * and the work directory. + * @param file the file to move + * @param src the source directory + * @param dest the target directory + * @return the final path for the specific output file + * @throws java.io.IOException + */ + private Path getFinalPath(Path file, Path src, + Path dest) throws IOException { + URI taskOutputUri = file.toUri(); + URI relativePath = src.toUri().relativize(taskOutputUri); + if (taskOutputUri == relativePath) { + throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Can not get the relative path: base = " + + src + " child = " + file); + } + if (relativePath.getPath().length() > 0) { + return new Path(dest, relativePath.getPath()); + } else { + return dest; + } + } + + /** + * Run to discover dynamic partitions available + */ + private void discoverPartitions(JobContext context) throws IOException { + if (!partitionsDiscovered) { + // LOG.info("discover ptns called"); + OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context); + + harProcessor.setEnabled(jobInfo.getHarRequested()); + + List dynamicPartCols = jobInfo.getPosOfDynPartCols(); + int maxDynamicPartitions = jobInfo.getMaxDynamicPartitions(); + + Path loadPath = new Path(jobInfo.getLocation()); + FileSystem fs = loadPath.getFileSystem(context.getConfiguration()); + + // construct a path pattern (e.g., /*/*) to find all dynamically generated paths + String dynPathSpec = loadPath.toUri().getPath(); + dynPathSpec = dynPathSpec.replaceAll("__HIVE_DEFAULT_PARTITION__", "*"); + + // LOG.info("Searching for "+dynPathSpec); + Path pathPattern = new Path(dynPathSpec); + FileStatus[] status = fs.globStatus(pathPattern); + + partitionsDiscoveredByPath = new LinkedHashMap>(); + contextDiscoveredByPath = new LinkedHashMap(); + + + if (status.length == 0) { + // LOG.warn("No partition found genereated by dynamic partitioning in [" + // +loadPath+"] with depth["+jobInfo.getTable().getPartitionKeysSize() + // +"], dynSpec["+dynPathSpec+"]"); + } else { + if ((maxDynamicPartitions != -1) && (status.length > maxDynamicPartitions)) { + this.partitionsDiscovered = true; + throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, + "Number of dynamic partitions being created " + + "exceeds configured max allowable partitions[" + + maxDynamicPartitions + + "], increase parameter [" + + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname + + "] if needed."); + } + + for (FileStatus st : status) { + LinkedHashMap fullPartSpec = new LinkedHashMap(); + Warehouse.makeSpecFromName(fullPartSpec, st.getPath()); + partitionsDiscoveredByPath.put(st.getPath().toString(), fullPartSpec); + JobConf jobConf = (JobConf)context.getConfiguration(); + JobContext currContext = HCatMapRedUtil.createJobContext( + jobConf, + context.getJobID(), + InternalUtil.createReporter(HCatMapRedUtil.createTaskAttemptContext(jobConf, + ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptID()))); + HCatOutputFormat.configureOutputStorageHandler(currContext, jobInfo, fullPartSpec); + contextDiscoveredByPath.put(st.getPath().toString(), currContext); + } + } + + // for (Entry> spec : partitionsDiscoveredByPath.entrySet()){ + // LOG.info("Partition "+ spec.getKey()); + // for (Entry e : spec.getValue().entrySet()){ + // LOG.info(e.getKey() + "=>" +e.getValue()); + // } + // } + + this.partitionsDiscovered = true; + } + } + + private void registerPartitions(JobContext context) throws IOException{ + if (dynamicPartitioningUsed){ + discoverPartitions(context); + } + OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context); + Configuration conf = context.getConfiguration(); + Table table = new Table(jobInfo.getTableInfo().getTable()); + Path tblPath = new Path(table.getTTable().getSd().getLocation()); + FileSystem fs = tblPath.getFileSystem(conf); + + if( table.getPartitionKeys().size() == 0 ) { + //Move data from temp directory the actual table directory + //No metastore operation required. + Path src = new Path(jobInfo.getLocation()); + moveTaskOutputs(fs, src, src, tblPath, false); + fs.delete(src, true); + return; + } + + HiveMetaStoreClient client = null; + HCatTableInfo tableInfo = jobInfo.getTableInfo(); + List partitionsAdded = new ArrayList(); + try { + HiveConf hiveConf = HCatUtil.getHiveConf(conf); + client = HCatUtil.getHiveClient(hiveConf); + StorerInfo storer = InternalUtil.extractStorerInfo(table.getTTable().getSd(),table.getParameters()); + + FileStatus tblStat = fs.getFileStatus(tblPath); + String grpName = tblStat.getGroup(); + FsPermission perms = tblStat.getPermission(); + + List partitionsToAdd = new ArrayList(); + if (!dynamicPartitioningUsed){ + partitionsToAdd.add( + constructPartition( + context,jobInfo, + tblPath.toString(), jobInfo.getPartitionValues() + ,jobInfo.getOutputSchema(), getStorerParameterMap(storer) + ,table, fs + ,grpName,perms)); + }else{ + for (Entry> entry : partitionsDiscoveredByPath.entrySet()){ + partitionsToAdd.add( + constructPartition( + context,jobInfo, + getPartitionRootLocation(entry.getKey(),entry.getValue().size()), entry.getValue() + ,jobInfo.getOutputSchema(), getStorerParameterMap(storer) + ,table, fs + ,grpName,perms)); + } + } + + ArrayList> ptnInfos = new ArrayList>(); + for(Partition ptn : partitionsToAdd){ + ptnInfos.add(InternalUtil.createPtnKeyValueMap(new Table(tableInfo.getTable()), ptn)); + } + + //Publish the new partition(s) + if (dynamicPartitioningUsed && harProcessor.isEnabled() && (!partitionsToAdd.isEmpty())){ + + Path src = new Path(ptnRootLocation); + // check here for each dir we're copying out, to see if it + // already exists, error out if so + moveTaskOutputs(fs, src, src, tblPath, true); + moveTaskOutputs(fs, src, src, tblPath, false); + fs.delete(src, true); + try { + updateTableSchema(client, table, jobInfo.getOutputSchema()); + LOG.info("HAR is being used. The table {} has new partitions {}.", table.getTableName(), ptnInfos); + client.add_partitions(partitionsToAdd); + partitionsAdded = partitionsToAdd; + } catch (Exception e){ + // There was an error adding partitions : rollback fs copy and rethrow + for (Partition p : partitionsToAdd){ + Path ptnPath = new Path(harProcessor.getParentFSPath(new Path(p.getSd().getLocation()))); + if (fs.exists(ptnPath)){ + fs.delete(ptnPath,true); + } + } + throw e; + } + + }else{ + // no harProcessor, regular operation + updateTableSchema(client, table, jobInfo.getOutputSchema()); + LOG.info("HAR not is not being used. The table {} has new partitions {}.", table.getTableName(), ptnInfos); + if (dynamicPartitioningUsed && (partitionsToAdd.size()>0)){ + Path src = new Path(ptnRootLocation); + moveTaskOutputs(fs, src, src, tblPath, true); + moveTaskOutputs(fs, src, src, tblPath, false); + fs.delete(src, true); + } + client.add_partitions(partitionsToAdd); + partitionsAdded = partitionsToAdd; + } + } catch (Exception e) { + if (partitionsAdded.size() > 0) { + try { + // baseCommitter.cleanupJob failed, try to clean up the + // metastore + for (Partition p : partitionsAdded) { + client.dropPartition(tableInfo.getDatabaseName(), + tableInfo.getTableName(), p.getValues()); + } + } catch (Exception te) { + // Keep cause as the original exception + throw new HCatException( + ErrorType.ERROR_PUBLISHING_PARTITION, e); + } + } + if (e instanceof HCatException) { + throw (HCatException) e; + } else { + throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e); + } + } finally { + HCatUtil.closeHiveClientQuietly(client); + } + } + + private void cancelDelegationTokens(JobContext context) throws IOException{ + LOG.info("Cancelling deletgation token for the job."); + HiveMetaStoreClient client = null; + try { + HiveConf hiveConf = HCatUtil + .getHiveConf(context.getConfiguration()); + client = HCatUtil.getHiveClient(hiveConf); + // cancel the deleg. tokens that were acquired for this job now that + // we are done - we should cancel if the tokens were acquired by + // HCatOutputFormat and not if they were supplied by Oozie. + // In the latter case the HCAT_KEY_TOKEN_SIGNATURE property in + // the conf will not be set + String tokenStrForm = client.getTokenStrForm(); + if (tokenStrForm != null + && context.getConfiguration().get( + HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { + client.cancelDelegationToken(tokenStrForm); + } + } catch (MetaException e) { + LOG.warn("MetaException while cancelling delegation token.", e); + } catch (TException e) { + LOG.warn("TException while cancelling delegation token.", e); + } finally { + HCatUtil.closeHiveClientQuietly(client); + } + } + + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputFormatContainer.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputFormatContainer.java new file mode 100644 index 0000000..fb306d8 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputFormatContainer.java @@ -0,0 +1,252 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapred.FileOutputFormat; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hive.hcatalog.common.ErrorType; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.thrift.TException; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** + * File-based storage (ie RCFile, Text, etc) implementation of OutputFormatContainer. + * This implementation supports the following HCatalog features: partitioning, dynamic partitioning, Hadoop Archiving, etc. + */ +class FileOutputFormatContainer extends OutputFormatContainer { + + private static final PathFilter hiddenFileFilter = new PathFilter() { + public boolean accept(Path p) { + String name = p.getName(); + return !name.startsWith("_") && !name.startsWith("."); + } + }; + + /** + * @param of base OutputFormat to contain + */ + public FileOutputFormatContainer(org.apache.hadoop.mapred.OutputFormat, ? super Writable> of) { + super(of); + } + + @Override + public RecordWriter, HCatRecord> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { + //this needs to be manually set, under normal circumstances MR Task does this + setWorkOutputPath(context); + + //Configure the output key and value classes. + // This is required for writing null as key for file based tables. + context.getConfiguration().set("mapred.output.key.class", + NullWritable.class.getName()); + String jobInfoString = context.getConfiguration().get( + HCatConstants.HCAT_KEY_OUTPUT_INFO); + OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil + .deserialize(jobInfoString); + StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo(); + HCatStorageHandler storageHandler = HCatUtil.getStorageHandler( + context.getConfiguration(), storeInfo); + Class serde = storageHandler.getSerDeClass(); + SerDe sd = (SerDe) ReflectionUtils.newInstance(serde, + context.getConfiguration()); + context.getConfiguration().set("mapred.output.value.class", + sd.getSerializedClass().getName()); + + RecordWriter, HCatRecord> rw; + if (HCatBaseOutputFormat.getJobInfo(context).isDynamicPartitioningUsed()){ + // When Dynamic partitioning is used, the RecordWriter instance initialized here isn't used. Can use null. + // (That's because records can't be written until the values of the dynamic partitions are deduced. + // By that time, a new local instance of RecordWriter, with the correct output-path, will be constructed.) + rw = new FileRecordWriterContainer((org.apache.hadoop.mapred.RecordWriter)null,context); + } else { + Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir")); + Path childPath = new Path(parentDir,FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()), "part")); + + rw = new FileRecordWriterContainer( + getBaseOutputFormat().getRecordWriter( + parentDir.getFileSystem(context.getConfiguration()), + new JobConf(context.getConfiguration()), + childPath.toString(), + InternalUtil.createReporter(context)), + context); + } + return rw; + } + + @Override + public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { + OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(context); + HiveMetaStoreClient client = null; + try { + HiveConf hiveConf = HCatUtil.getHiveConf(context.getConfiguration()); + client = HCatUtil.getHiveClient(hiveConf); + handleDuplicatePublish(context, + jobInfo, + client, + new Table(jobInfo.getTableInfo().getTable())); + } catch (MetaException e) { + throw new IOException(e); + } catch (TException e) { + throw new IOException(e); + } finally { + HCatUtil.closeHiveClientQuietly(client); + } + + if (!jobInfo.isDynamicPartitioningUsed()) { + JobConf jobConf = new JobConf(context.getConfiguration()); + getBaseOutputFormat().checkOutputSpecs(null, jobConf); + //checkoutputspecs might've set some properties we need to have context reflect that + HCatUtil.copyConf(jobConf, context.getConfiguration()); + } + } + + @Override + public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { + //this needs to be manually set, under normal circumstances MR Task does this + setWorkOutputPath(context); + return new FileOutputCommitterContainer(context, + HCatBaseOutputFormat.getJobInfo(context).isDynamicPartitioningUsed() ? + null : + new JobConf(context.getConfiguration()).getOutputCommitter()); + } + + /** + * Handles duplicate publish of partition. Fails if partition already exists. + * For non partitioned tables, fails if files are present in table directory. + * For dynamic partitioned publish, does nothing - check would need to be done at recordwriter time + * @param context the job + * @param outputInfo the output info + * @param client the metastore client + * @param table the table being written to + * @throws IOException + * @throws org.apache.hadoop.hive.metastore.api.MetaException + * @throws org.apache.thrift.TException + */ + private static void handleDuplicatePublish(JobContext context, OutputJobInfo outputInfo, + HiveMetaStoreClient client, Table table) throws IOException, MetaException, TException, NoSuchObjectException { + + /* + * For fully specified ptn, follow strict checks for existence of partitions in metadata + * For unpartitioned tables, follow filechecks + * For partially specified tables: + * This would then need filechecks at the start of a ptn write, + * Doing metadata checks can get potentially very expensive (fat conf) if + * there are a large number of partitions that match the partial specifications + */ + + if (table.getPartitionKeys().size() > 0) { + if (!outputInfo.isDynamicPartitioningUsed()) { + List partitionValues = getPartitionValueList( + table, outputInfo.getPartitionValues()); + // fully-specified partition + List currentParts = client.listPartitionNames(outputInfo.getDatabaseName(), + outputInfo.getTableName(), partitionValues, (short) 1); + + if (currentParts.size() > 0) { + throw new HCatException(ErrorType.ERROR_DUPLICATE_PARTITION); + } + } + } else { + List partitionValues = getPartitionValueList( + table, outputInfo.getPartitionValues()); + // non-partitioned table + + Path tablePath = new Path(table.getTTable().getSd().getLocation()); + FileSystem fs = tablePath.getFileSystem(context.getConfiguration()); + + if (fs.exists(tablePath)) { + FileStatus[] status = fs.globStatus(new Path(tablePath, "*"), hiddenFileFilter); + + if (status.length > 0) { + throw new HCatException(ErrorType.ERROR_NON_EMPTY_TABLE, + table.getDbName() + "." + table.getTableName()); + } + } + } + } + + /** + * Convert the partition value map to a value list in the partition key order. + * @param table the table being written to + * @param valueMap the partition value map + * @return the partition value list + * @throws java.io.IOException + */ + static List getPartitionValueList(Table table, Map valueMap) throws IOException { + + if (valueMap.size() != table.getPartitionKeys().size()) { + throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, + "Table " + + table.getTableName() + " has " + + table.getPartitionKeys().size() + " partition keys, got " + + valueMap.size()); + } + + List values = new ArrayList(); + + for (FieldSchema schema : table.getPartitionKeys()) { + String value = valueMap.get(schema.getName().toLowerCase()); + + if (value == null) { + throw new HCatException(ErrorType.ERROR_MISSING_PARTITION_KEY, + "Key " + schema.getName() + " of table " + table.getTableName()); + } + + values.add(value); + } + + return values; + } + + static void setWorkOutputPath(TaskAttemptContext context) throws IOException { + String outputPath = context.getConfiguration().get("mapred.output.dir"); + //we need to do this to get the task path and set it for mapred implementation + //since it can't be done automatically because of mapreduce->mapred abstraction + if (outputPath != null) + context.getConfiguration().set("mapred.work.output.dir", + new FileOutputCommitter(new Path(outputPath), context).getWorkPath().toString()); + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileRecordWriterContainer.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileRecordWriterContainer.java new file mode 100644 index 0000000..58c137d --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileRecordWriterContainer.java @@ -0,0 +1,266 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hive.hcatalog.common.ErrorType; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.HCatRecord; + +/** + * Part of the FileOutput*Container classes + * See {@link FileOutputFormatContainer} for more information + */ +class FileRecordWriterContainer extends RecordWriterContainer { + + private final HCatStorageHandler storageHandler; + private final SerDe serDe; + private final ObjectInspector objectInspector; + + private boolean dynamicPartitioningUsed = false; + + private final Map, ? super Writable>> baseDynamicWriters; + private final Map baseDynamicSerDe; + private final Map baseDynamicCommitters; + private final Map dynamicContexts; + private final Map dynamicObjectInspectors; + private Map dynamicOutputJobInfo; + + + private final List partColsToDel; + private final List dynamicPartCols; + private int maxDynamicPartitions; + + private OutputJobInfo jobInfo; + private TaskAttemptContext context; + + /** + * @param baseWriter RecordWriter to contain + * @param context current TaskAttemptContext + * @throws IOException + * @throws InterruptedException + */ + public FileRecordWriterContainer(org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseWriter, + TaskAttemptContext context) throws IOException, InterruptedException { + super(context, baseWriter); + this.context = context; + jobInfo = HCatOutputFormat.getJobInfo(context); + + storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); + serDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), context.getConfiguration()); + objectInspector = InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema()); + try { + InternalUtil.initializeOutputSerDe(serDe, context.getConfiguration(), jobInfo); + } catch (SerDeException e) { + throw new IOException("Failed to inialize SerDe", e); + } + + // If partition columns occur in data, we want to remove them. + partColsToDel = jobInfo.getPosOfPartCols(); + dynamicPartitioningUsed = jobInfo.isDynamicPartitioningUsed(); + dynamicPartCols = jobInfo.getPosOfDynPartCols(); + maxDynamicPartitions = jobInfo.getMaxDynamicPartitions(); + + if ((partColsToDel == null) || (dynamicPartitioningUsed && (dynamicPartCols == null))) { + throw new HCatException("It seems that setSchema() is not called on " + + "HCatOutputFormat. Please make sure that method is called."); + } + + + if (!dynamicPartitioningUsed) { + this.baseDynamicSerDe = null; + this.baseDynamicWriters = null; + this.baseDynamicCommitters = null; + this.dynamicContexts = null; + this.dynamicObjectInspectors = null; + this.dynamicOutputJobInfo = null; + } else { + this.baseDynamicSerDe = new HashMap(); + this.baseDynamicWriters = new HashMap, ? super Writable>>(); + this.baseDynamicCommitters = new HashMap(); + this.dynamicContexts = new HashMap(); + this.dynamicObjectInspectors = new HashMap(); + this.dynamicOutputJobInfo = new HashMap(); + } + } + + /** + * @return the storagehandler + */ + public HCatStorageHandler getStorageHandler() { + return storageHandler; + } + + @Override + public void close(TaskAttemptContext context) throws IOException, + InterruptedException { + Reporter reporter = InternalUtil.createReporter(context); + if (dynamicPartitioningUsed) { + for (org.apache.hadoop.mapred.RecordWriter, ? super Writable> bwriter : baseDynamicWriters.values()) { + //We are in RecordWriter.close() make sense that the context would be TaskInputOutput + bwriter.close(reporter); + } + for (Map.Entry entry : baseDynamicCommitters.entrySet()) { + org.apache.hadoop.mapred.TaskAttemptContext currContext = dynamicContexts.get(entry.getKey()); + OutputCommitter baseOutputCommitter = entry.getValue(); + if (baseOutputCommitter.needsTaskCommit(currContext)) { + baseOutputCommitter.commitTask(currContext); + } + } + } else { + getBaseRecordWriter().close(reporter); + } + } + + @Override + public void write(WritableComparable key, HCatRecord value) throws IOException, + InterruptedException { + + org.apache.hadoop.mapred.RecordWriter localWriter; + ObjectInspector localObjectInspector; + SerDe localSerDe; + OutputJobInfo localJobInfo = null; + + if (dynamicPartitioningUsed) { + // calculate which writer to use from the remaining values - this needs to be done before we delete cols + List dynamicPartValues = new ArrayList(); + for (Integer colToAppend : dynamicPartCols) { + dynamicPartValues.add(value.get(colToAppend).toString()); + } + + String dynKey = dynamicPartValues.toString(); + if (!baseDynamicWriters.containsKey(dynKey)) { + if ((maxDynamicPartitions != -1) && (baseDynamicWriters.size() > maxDynamicPartitions)) { + throw new HCatException(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, + "Number of dynamic partitions being created " + + "exceeds configured max allowable partitions[" + + maxDynamicPartitions + + "], increase parameter [" + + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS.varname + + "] if needed."); + } + + org.apache.hadoop.mapred.TaskAttemptContext currTaskContext = HCatMapRedUtil.createTaskAttemptContext(context); + configureDynamicStorageHandler(currTaskContext, dynamicPartValues); + localJobInfo = HCatBaseOutputFormat.getJobInfo(currTaskContext); + + //setup serDe + SerDe currSerDe = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), currTaskContext.getJobConf()); + try { + InternalUtil.initializeOutputSerDe(currSerDe, currTaskContext.getConfiguration(), localJobInfo); + } catch (SerDeException e) { + throw new IOException("Failed to initialize SerDe", e); + } + + //create base OutputFormat + org.apache.hadoop.mapred.OutputFormat baseOF = + ReflectionUtils.newInstance(storageHandler.getOutputFormatClass(), currTaskContext.getJobConf()); + + //We are skipping calling checkOutputSpecs() for each partition + //As it can throw a FileAlreadyExistsException when more than one mapper is writing to a partition + //See HCATALOG-490, also to avoid contacting the namenode for each new FileOutputFormat instance + //In general this should be ok for most FileOutputFormat implementations + //but may become an issue for cases when the method is used to perform other setup tasks + + //get Output Committer + org.apache.hadoop.mapred.OutputCommitter baseOutputCommitter = currTaskContext.getJobConf().getOutputCommitter(); + //create currJobContext the latest so it gets all the config changes + org.apache.hadoop.mapred.JobContext currJobContext = HCatMapRedUtil.createJobContext(currTaskContext); + //setupJob() + baseOutputCommitter.setupJob(currJobContext); + //recreate to refresh jobConf of currTask context + currTaskContext = + HCatMapRedUtil.createTaskAttemptContext(currJobContext.getJobConf(), + currTaskContext.getTaskAttemptID(), + currTaskContext.getProgressible()); + //set temp location + currTaskContext.getConfiguration().set("mapred.work.output.dir", + new FileOutputCommitter(new Path(localJobInfo.getLocation()), currTaskContext).getWorkPath().toString()); + //setupTask() + baseOutputCommitter.setupTask(currTaskContext); + + Path parentDir = new Path(currTaskContext.getConfiguration().get("mapred.work.output.dir")); + Path childPath = new Path(parentDir,FileOutputFormat.getUniqueFile(currTaskContext, "part", "")); + + org.apache.hadoop.mapred.RecordWriter baseRecordWriter = + baseOF.getRecordWriter( + parentDir.getFileSystem(currTaskContext.getConfiguration()), + currTaskContext.getJobConf(), + childPath.toString(), + InternalUtil.createReporter(currTaskContext)); + + baseDynamicWriters.put(dynKey, baseRecordWriter); + baseDynamicSerDe.put(dynKey, currSerDe); + baseDynamicCommitters.put(dynKey, baseOutputCommitter); + dynamicContexts.put(dynKey, currTaskContext); + dynamicObjectInspectors.put(dynKey, InternalUtil.createStructObjectInspector(jobInfo.getOutputSchema())); + dynamicOutputJobInfo.put(dynKey, HCatOutputFormat.getJobInfo(dynamicContexts.get(dynKey))); + } + + localJobInfo = dynamicOutputJobInfo.get(dynKey); + localWriter = baseDynamicWriters.get(dynKey); + localSerDe = baseDynamicSerDe.get(dynKey); + localObjectInspector = dynamicObjectInspectors.get(dynKey); + } else { + localJobInfo = jobInfo; + localWriter = getBaseRecordWriter(); + localSerDe = serDe; + localObjectInspector = objectInspector; + } + + for (Integer colToDel : partColsToDel) { + value.remove(colToDel); + } + + + //The key given by user is ignored + try { + localWriter.write(NullWritable.get(), localSerDe.serialize(value.getAll(), localObjectInspector)); + } catch (SerDeException e) { + throw new IOException("Failed to serialize object", e); + } + } + + protected void configureDynamicStorageHandler(JobContext context, List dynamicPartVals) throws IOException { + HCatOutputFormat.configureOutputStorageHandler(context, dynamicPartVals); + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FosterStorageHandler.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FosterStorageHandler.java new file mode 100644 index 0000000..09739a1 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FosterStorageHandler.java @@ -0,0 +1,186 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.metastore.HiveMetaHook; +import org.apache.hadoop.hive.ql.io.RCFile; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider; +import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.OutputFormat; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatUtil; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** + * This class is used to encapsulate the InputFormat, OutputFormat and SerDe + * artifacts of tables which don't define a SerDe. This StorageHandler assumes + * the supplied storage artifacts are for a file-based storage system. + */ +public class FosterStorageHandler extends HCatStorageHandler { + + public Configuration conf; + /** The directory under which data is initially written for a partitioned table */ + protected static final String DYNTEMP_DIR_NAME = "_DYN"; + + /** The directory under which data is initially written for a non partitioned table */ + protected static final String TEMP_DIR_NAME = "_TEMP"; + + private Class ifClass; + private Class ofClass; + private Class serDeClass; + + public FosterStorageHandler(String ifName, String ofName, String serdeName) throws ClassNotFoundException { + this((Class) Class.forName(ifName), + (Class) Class.forName(ofName), + (Class) Class.forName(serdeName)); + } + + public FosterStorageHandler(Class ifClass, + Class ofClass, + Class serDeClass) { + this.ifClass = ifClass; + this.ofClass = ofClass; + this.serDeClass = serDeClass; + } + + @Override + public Class getInputFormatClass() { + return ifClass; //To change body of overridden methods use File | Settings | File Templates. + } + + @Override + public Class getOutputFormatClass() { + return ofClass; //To change body of overridden methods use File | Settings | File Templates. + } + + @Override + public Class getSerDeClass() { + return serDeClass; //To change body of implemented methods use File | Settings | File Templates. + } + + @Override + public HiveMetaHook getMetaHook() { + return null; + } + + @Override + public void configureInputJobProperties(TableDesc tableDesc, + Map jobProperties) { + + } + + @Override + public void configureOutputJobProperties(TableDesc tableDesc, + Map jobProperties) { + try { + OutputJobInfo jobInfo = (OutputJobInfo) + HCatUtil.deserialize(tableDesc.getJobProperties().get( + HCatConstants.HCAT_KEY_OUTPUT_INFO)); + String parentPath = jobInfo.getTableInfo().getTableLocation(); + String dynHash = tableDesc.getJobProperties().get( + HCatConstants.HCAT_DYNAMIC_PTN_JOBID); + + // For dynamic partitioned writes without all keyvalues specified, + // we create a temp dir for the associated write job + if (dynHash != null) { + parentPath = new Path(parentPath, + DYNTEMP_DIR_NAME + dynHash).toString(); + } + + String outputLocation; + + if ((dynHash == null) + && Boolean.valueOf((String)tableDesc.getProperties().get("EXTERNAL")) + && jobInfo.getLocation() != null && jobInfo.getLocation().length() > 0) { + // honor custom location for external table apart from what metadata specifies + // only if we're not using dynamic partitioning - see HIVE-5011 + outputLocation = jobInfo.getLocation(); + } else if (dynHash == null && jobInfo.getPartitionValues().size() == 0) { + // For non-partitioned tables, we send them to the temp dir + outputLocation = TEMP_DIR_NAME; + } else { + List cols = new ArrayList(); + List values = new ArrayList(); + + //Get the output location in the order partition keys are defined for the table. + for (String name : + jobInfo.getTableInfo(). + getPartitionColumns().getFieldNames()) { + String value = jobInfo.getPartitionValues().get(name); + cols.add(name); + values.add(value); + } + outputLocation = FileUtils.makePartName(cols, values); + } + + jobInfo.setLocation(new Path(parentPath, outputLocation).toString()); + + //only set output dir if partition is fully materialized + if (jobInfo.getPartitionValues().size() + == jobInfo.getTableInfo().getPartitionColumns().size()) { + jobProperties.put("mapred.output.dir", jobInfo.getLocation()); + } + + //TODO find a better home for this, RCFile specifc + jobProperties.put(RCFile.COLUMN_NUMBER_CONF_STR, + Integer.toOctalString( + jobInfo.getOutputSchema().getFields().size())); + jobProperties.put(HCatConstants.HCAT_KEY_OUTPUT_INFO, + HCatUtil.serialize(jobInfo)); + } catch (IOException e) { + throw new IllegalStateException("Failed to set output path", e); + } + + } + + @Override + OutputFormatContainer getOutputFormatContainer( + org.apache.hadoop.mapred.OutputFormat outputFormat) { + return new FileOutputFormatContainer(outputFormat); + } + + @Override + public Configuration getConf() { + return conf; + } + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + } + + @Override + public HiveAuthorizationProvider getAuthorizationProvider() + throws HiveException { + return new DefaultHiveAuthorizationProvider(); + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseInputFormat.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseInputFormat.java new file mode 100644 index 0000000..0e3938e --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseInputFormat.java @@ -0,0 +1,333 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.Map; +import java.util.HashMap; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.InputFormat; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.util.StringUtils; + +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchema; + +public abstract class HCatBaseInputFormat + extends InputFormat { + + /** + * get the schema for the HCatRecord data returned by HCatInputFormat. + * + * @param context the jobContext + * @throws IllegalArgumentException + */ + private Class inputFileFormatClass; + + // TODO needs to go in InitializeInput? as part of InputJobInfo + private static HCatSchema getOutputSchema(Configuration conf) + throws IOException { + String os = conf.get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA); + if (os == null) { + return getTableSchema(conf); + } else { + return (HCatSchema) HCatUtil.deserialize(os); + } + } + + /** + * Set the schema for the HCatRecord data returned by HCatInputFormat. + * @param job the job object + * @param hcatSchema the schema to use as the consolidated schema + */ + public static void setOutputSchema(Job job, HCatSchema hcatSchema) + throws IOException { + job.getConfiguration().set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, + HCatUtil.serialize(hcatSchema)); + } + + protected static org.apache.hadoop.mapred.InputFormat + getMapRedInputFormat(JobConf job, Class inputFormatClass) throws IOException { + return ( + org.apache.hadoop.mapred.InputFormat) + ReflectionUtils.newInstance(inputFormatClass, job); + } + + /** + * Logically split the set of input files for the job. Returns the + * underlying InputFormat's splits + * @param jobContext the job context object + * @return the splits, an HCatInputSplit wrapper over the storage + * handler InputSplits + * @throws IOException or InterruptedException + */ + @Override + public List getSplits(JobContext jobContext) + throws IOException, InterruptedException { + Configuration conf = jobContext.getConfiguration(); + + //Get the job info from the configuration, + //throws exception if not initialized + InputJobInfo inputJobInfo; + try { + inputJobInfo = getJobInfo(conf); + } catch (Exception e) { + throw new IOException(e); + } + + List splits = new ArrayList(); + List partitionInfoList = inputJobInfo.getPartitions(); + if (partitionInfoList == null) { + //No partitions match the specified partition filter + return splits; + } + + HCatStorageHandler storageHandler; + JobConf jobConf; + //For each matching partition, call getSplits on the underlying InputFormat + for (PartInfo partitionInfo : partitionInfoList) { + jobConf = HCatUtil.getJobConfFromContext(jobContext); + setInputPath(jobConf, partitionInfo.getLocation()); + Map jobProperties = partitionInfo.getJobProperties(); + + HCatSchema allCols = new HCatSchema(new LinkedList()); + for (HCatFieldSchema field : + inputJobInfo.getTableInfo().getDataColumns().getFields()) + allCols.append(field); + for (HCatFieldSchema field : + inputJobInfo.getTableInfo().getPartitionColumns().getFields()) + allCols.append(field); + + HCatUtil.copyJobPropertiesToJobConf(jobProperties, jobConf); + + storageHandler = HCatUtil.getStorageHandler( + jobConf, partitionInfo); + + //Get the input format + Class inputFormatClass = storageHandler.getInputFormatClass(); + org.apache.hadoop.mapred.InputFormat inputFormat = + getMapRedInputFormat(jobConf, inputFormatClass); + + //Call getSplit on the InputFormat, create an HCatSplit for each + //underlying split. When the desired number of input splits is missing, + //use a default number (denoted by zero). + //TODO(malewicz): Currently each partition is split independently into + //a desired number. However, we want the union of all partitions to be + //split into a desired number while maintaining balanced sizes of input + //splits. + int desiredNumSplits = + conf.getInt(HCatConstants.HCAT_DESIRED_PARTITION_NUM_SPLITS, 0); + org.apache.hadoop.mapred.InputSplit[] baseSplits = + inputFormat.getSplits(jobConf, desiredNumSplits); + + for (org.apache.hadoop.mapred.InputSplit split : baseSplits) { + splits.add(new HCatSplit( + partitionInfo, + split, allCols)); + } + } + + return splits; + } + + /** + * Create the RecordReader for the given InputSplit. Returns the underlying + * RecordReader if the required operations are supported and schema matches + * with HCatTable schema. Returns an HCatRecordReader if operations need to + * be implemented in HCat. + * @param split the split + * @param taskContext the task attempt context + * @return the record reader instance, either an HCatRecordReader(later) or + * the underlying storage handler's RecordReader + * @throws IOException or InterruptedException + */ + @Override + public RecordReader + createRecordReader(InputSplit split, + TaskAttemptContext taskContext) throws IOException, InterruptedException { + + HCatSplit hcatSplit = InternalUtil.castToHCatSplit(split); + PartInfo partitionInfo = hcatSplit.getPartitionInfo(); + JobContext jobContext = taskContext; + Configuration conf = jobContext.getConfiguration(); + + HCatStorageHandler storageHandler = HCatUtil.getStorageHandler( + conf, partitionInfo); + + JobConf jobConf = HCatUtil.getJobConfFromContext(jobContext); + Map jobProperties = partitionInfo.getJobProperties(); + HCatUtil.copyJobPropertiesToJobConf(jobProperties, jobConf); + + Map valuesNotInDataCols = getColValsNotInDataColumns( + getOutputSchema(conf), partitionInfo + ); + + return new HCatRecordReader(storageHandler, valuesNotInDataCols); + } + + + /** + * gets values for fields requested by output schema which will not be in the data + */ + private static Map getColValsNotInDataColumns(HCatSchema outputSchema, + PartInfo partInfo) { + HCatSchema dataSchema = partInfo.getPartitionSchema(); + Map vals = new HashMap(); + for (String fieldName : outputSchema.getFieldNames()) { + if (dataSchema.getPosition(fieldName) == null) { + // this entry of output is not present in the output schema + // so, we first check the table schema to see if it is a part col + + if (partInfo.getPartitionValues().containsKey(fieldName)) { + vals.put(fieldName, partInfo.getPartitionValues().get(fieldName)); + } else { + vals.put(fieldName, null); + } + } + } + return vals; + } + + /** + * @see org.apache.hive.hcatalog.mapreduce.HCatBaseInputFormat#getTableSchema(org.apache.hadoop.conf.Configuration) + * @deprecated Use {@link #getTableSchema(org.apache.hadoop.conf.Configuration)} + */ + public static HCatSchema getTableSchema(JobContext context) + throws IOException { + return getTableSchema(context.getConfiguration()); + } + + + /** + * Gets the HCatTable schema for the table specified in the HCatInputFormat.setInput call + * on the specified job context. This information is available only after HCatInputFormat.setInput + * has been called for a JobContext. + * @param conf the Configuration object + * @return the table schema + * @throws IOException if HCatInputFormat.setInput has not been called + * for the current context + */ + public static HCatSchema getTableSchema(Configuration conf) + throws IOException { + InputJobInfo inputJobInfo = getJobInfo(conf); + HCatSchema allCols = new HCatSchema(new LinkedList()); + for (HCatFieldSchema field : + inputJobInfo.getTableInfo().getDataColumns().getFields()) + allCols.append(field); + for (HCatFieldSchema field : + inputJobInfo.getTableInfo().getPartitionColumns().getFields()) + allCols.append(field); + return allCols; + } + + /** + * Gets the InputJobInfo object by reading the Configuration and deserializing + * the string. If InputJobInfo is not present in the configuration, throws an + * exception since that means HCatInputFormat.setInput has not been called. + * @param conf the Configuration object + * @return the InputJobInfo object + * @throws IOException the exception + */ + private static InputJobInfo getJobInfo(Configuration conf) + throws IOException { + String jobString = conf.get( + HCatConstants.HCAT_KEY_JOB_INFO); + if (jobString == null) { + throw new IOException("job information not found in JobContext." + + " HCatInputFormat.setInput() not called?"); + } + + return (InputJobInfo) HCatUtil.deserialize(jobString); + } + + private void setInputPath(JobConf jobConf, String location) + throws IOException { + + // ideally we should just call FileInputFormat.setInputPaths() here - but + // that won't work since FileInputFormat.setInputPaths() needs + // a Job object instead of a JobContext which we are handed here + + int length = location.length(); + int curlyOpen = 0; + int pathStart = 0; + boolean globPattern = false; + List pathStrings = new ArrayList(); + + for (int i = 0; i < length; i++) { + char ch = location.charAt(i); + switch (ch) { + case '{': { + curlyOpen++; + if (!globPattern) { + globPattern = true; + } + break; + } + case '}': { + curlyOpen--; + if (curlyOpen == 0 && globPattern) { + globPattern = false; + } + break; + } + case ',': { + if (!globPattern) { + pathStrings.add(location.substring(pathStart, i)); + pathStart = i + 1; + } + break; + } + } + } + pathStrings.add(location.substring(pathStart, length)); + + Path[] paths = StringUtils.stringToPath(pathStrings.toArray(new String[0])); + String separator = ""; + StringBuilder str = new StringBuilder(); + + for (Path path : paths) { + FileSystem fs = path.getFileSystem(jobConf); + final String qualifiedPath = fs.makeQualified(path).toString(); + str.append(separator) + .append(StringUtils.escapeString(qualifiedPath)); + separator = StringUtils.COMMA_STR; + } + + jobConf.set("mapred.input.dir", str.toString()); + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseOutputFormat.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseOutputFormat.java new file mode 100644 index 0000000..23b5c8f --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatBaseOutputFormat.java @@ -0,0 +1,243 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hive.hcatalog.common.ErrorType; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatSchema; + +public abstract class HCatBaseOutputFormat extends OutputFormat, HCatRecord> { + +// static final private Log LOG = LogFactory.getLog(HCatBaseOutputFormat.class); + + /** + * @see org.apache.hive.hcatalog.mapreduce.HCatBaseOutputFormat#getTableSchema(org.apache.hadoop.conf.Configuration) + * @deprecated Use {@link #getTableSchema(org.apache.hadoop.conf.Configuration)} + */ + public static HCatSchema getTableSchema(JobContext context) throws IOException { + return getTableSchema(context.getConfiguration()); + } + + /** + * Gets the table schema for the table specified in the HCatOutputFormat.setOutput call + * on the specified job context. + * @param conf the Configuration object + * @return the table schema + * @throws IOException if HCatOutputFormat.setOutput has not been called for the passed context + */ + public static HCatSchema getTableSchema(Configuration conf) throws IOException { + OutputJobInfo jobInfo = getJobInfo(conf); + return jobInfo.getTableInfo().getDataColumns(); + } + + /** + * Check for validity of the output-specification for the job. + * @param context information about the job + * @throws IOException when output should not be attempted + */ + @Override + public void checkOutputSpecs(JobContext context + ) throws IOException, InterruptedException { + getOutputFormat(context).checkOutputSpecs(context); + } + + /** + * Gets the output format instance. + * @param context the job context + * @return the output format instance + * @throws IOException + */ + protected OutputFormat, HCatRecord> getOutputFormat(JobContext context) throws IOException { + OutputJobInfo jobInfo = getJobInfo(context); + HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), jobInfo.getTableInfo().getStorerInfo()); + //why do we need this? + configureOutputStorageHandler(context); + return storageHandler.getOutputFormatContainer(ReflectionUtils.newInstance(storageHandler.getOutputFormatClass(), context.getConfiguration())); + } + + /** + * @see org.apache.hive.hcatalog.mapreduce.HCatBaseOutputFormat#getJobInfo(org.apache.hadoop.conf.Configuration) + * @deprecated use {@link #getJobInfo(org.apache.hadoop.conf.Configuration)} + */ + public static OutputJobInfo getJobInfo(JobContext jobContext) throws IOException { + return getJobInfo(jobContext.getConfiguration()); + } + + /** + * Gets the HCatOuputJobInfo object by reading the Configuration and deserializing + * the string. If InputJobInfo is not present in the configuration, throws an + * exception since that means HCatOutputFormat.setOutput has not been called. + * @param conf the job Configuration object + * @return the OutputJobInfo object + * @throws IOException the IO exception + */ + public static OutputJobInfo getJobInfo(Configuration conf) throws IOException { + String jobString = conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO); + if (jobString == null) { + throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED); + } + + return (OutputJobInfo) HCatUtil.deserialize(jobString); + } + + /** + * Configure the output storage handler + * @param jobContext the job context + * @throws IOException + */ + @SuppressWarnings("unchecked") + static void configureOutputStorageHandler( + JobContext jobContext) throws IOException { + configureOutputStorageHandler(jobContext, (List) null); + } + + /** + * Configure the output storage handler with allowing specification of missing dynamic partvals + * @param jobContext the job context + * @param dynamicPartVals + * @throws IOException + */ + @SuppressWarnings("unchecked") + static void configureOutputStorageHandler( + JobContext jobContext, List dynamicPartVals) throws IOException { + Configuration conf = jobContext.getConfiguration(); + try { + OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); + HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, jobInfo.getTableInfo().getStorerInfo()); + + Map partitionValues = jobInfo.getPartitionValues(); + String location = jobInfo.getLocation(); + + if (dynamicPartVals != null) { + // dynamic part vals specified + List dynamicPartKeys = jobInfo.getDynamicPartitioningKeys(); + if (dynamicPartVals.size() != dynamicPartKeys.size()) { + throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, + "Unable to configure dynamic partitioning for storage handler, mismatch between" + + " number of partition values obtained[" + dynamicPartVals.size() + + "] and number of partition values required[" + dynamicPartKeys.size() + "]"); + } + for (int i = 0; i < dynamicPartKeys.size(); i++) { + partitionValues.put(dynamicPartKeys.get(i), dynamicPartVals.get(i)); + } + +// // re-home location, now that we know the rest of the partvals +// Table table = jobInfo.getTableInfo().getTable(); +// +// List partitionCols = new ArrayList(); +// for(FieldSchema schema : table.getPartitionKeys()) { +// partitionCols.add(schema.getName()); +// } + jobInfo.setPartitionValues(partitionValues); + } + + HCatUtil.configureOutputStorageHandler(storageHandler, conf, jobInfo); + } catch (Exception e) { + if (e instanceof HCatException) { + throw (HCatException) e; + } else { + throw new HCatException(ErrorType.ERROR_INIT_STORAGE_HANDLER, e); + } + } + } + + /** + * Configure the output storage handler, with allowing specification + * of partvals from which it picks the dynamic partvals + * @param context the job context + * @param jobInfo the output job info + * @param fullPartSpec + * @throws IOException + */ + + protected static void configureOutputStorageHandler( + JobContext context, OutputJobInfo jobInfo, + Map fullPartSpec) throws IOException { + List dynamicPartKeys = jobInfo.getDynamicPartitioningKeys(); + if ((dynamicPartKeys == null) || (dynamicPartKeys.isEmpty())) { + configureOutputStorageHandler(context, (List) null); + } else { + List dynKeyVals = new ArrayList(); + for (String dynamicPartKey : dynamicPartKeys) { + dynKeyVals.add(fullPartSpec.get(dynamicPartKey)); + } + configureOutputStorageHandler(context, dynKeyVals); + } + } + + + protected static void setPartDetails(OutputJobInfo jobInfo, final HCatSchema schema, + Map partMap) throws HCatException, IOException { + List posOfPartCols = new ArrayList(); + List posOfDynPartCols = new ArrayList(); + + // If partition columns occur in data, we want to remove them. + // So, find out positions of partition columns in schema provided by user. + // We also need to update the output Schema with these deletions. + + // Note that, output storage handlers never sees partition columns in data + // or schema. + + HCatSchema schemaWithoutParts = new HCatSchema(schema.getFields()); + for (String partKey : partMap.keySet()) { + Integer idx; + if ((idx = schema.getPosition(partKey)) != null) { + posOfPartCols.add(idx); + schemaWithoutParts.remove(schema.get(partKey)); + } + } + + // Also, if dynamic partitioning is being used, we want to + // set appropriate list of columns for the columns to be dynamically specified. + // These would be partition keys too, so would also need to be removed from + // output schema and partcols + + if (jobInfo.isDynamicPartitioningUsed()) { + for (String partKey : jobInfo.getDynamicPartitioningKeys()) { + Integer idx; + if ((idx = schema.getPosition(partKey)) != null) { + posOfPartCols.add(idx); + posOfDynPartCols.add(idx); + schemaWithoutParts.remove(schema.get(partKey)); + } + } + } + + HCatUtil.validatePartitionSchema( + new Table(jobInfo.getTableInfo().getTable()), schemaWithoutParts); + jobInfo.setPosOfPartCols(posOfPartCols); + jobInfo.setPosOfDynPartCols(posOfDynPartCols); + jobInfo.setOutputSchema(schemaWithoutParts); + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximInputFormat.java.broken hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximInputFormat.java.broken new file mode 100644 index 0000000..71b9652 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximInputFormat.java.broken @@ -0,0 +1,141 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hcatalog.mapreduce; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.ql.parse.EximUtil; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hcatalog.common.HCatConstants; +import org.apache.hcatalog.common.HCatUtil; +import org.apache.hcatalog.data.schema.HCatSchema; +import org.apache.hcatalog.data.schema.HCatSchemaUtils; + +/** The InputFormat to use to read data from HCat */ +public class HCatEximInputFormat extends HCatBaseInputFormat { + + /** + * Set the input to use for the Job. This queries the metadata file with + * the specified partition predicates, gets the matching partitions, puts + * the information in the conf object. The inputInfo object is updated with + * information needed in the client context + * + * @param job the job object + * @return two hcat schemas, for the table columns and the partition keys + * @throws IOException + * the exception in communicating with the metadata server + */ + public static List setInput(Job job, + String location, + Map partitionFilter) throws IOException { + FileSystem fs; + try { + fs = FileSystem.get(new URI(location), job.getConfiguration()); + } catch (URISyntaxException e) { + throw new IOException(e); + } + Path fromPath = new Path(location); + Path metadataPath = new Path(fromPath, "_metadata"); + try { + Map.Entry> tp = EximUtil + .readMetaData(fs, metadataPath); + org.apache.hadoop.hive.metastore.api.Table table = tp.getKey(); + InputJobInfo inputInfo = InputJobInfo.create(table.getDbName(), table.getTableName(),null,null,null); + List partCols = table.getPartitionKeys(); + List partInfoList = null; + if (partCols.size() > 0) { + List partColNames = new ArrayList(partCols.size()); + for (FieldSchema fsc : partCols) { + partColNames.add(fsc.getName()); + } + List partitions = tp.getValue(); + partInfoList = filterPartitions(partitionFilter, partitions, table.getPartitionKeys()); + } else { + partInfoList = new ArrayList(1); + HCatSchema schema = new HCatSchema(HCatUtil.getHCatFieldSchemaList(table.getSd().getCols())); + Map parameters = table.getParameters(); + String inputStorageDriverClass = null; + if (parameters.containsKey(HCatConstants.HCAT_ISD_CLASS)){ + inputStorageDriverClass = parameters.get(HCatConstants.HCAT_ISD_CLASS); + }else{ + throw new IOException("No input storage driver classname found, cannot read partition"); + } + Properties hcatProperties = new Properties(); + for (String key : parameters.keySet()){ + if (key.startsWith(InitializeInput.HCAT_KEY_PREFIX)){ + hcatProperties.put(key, parameters.get(key)); + } + } + PartInfo partInfo = new PartInfo(schema, inputStorageDriverClass, location + "/data", hcatProperties); + partInfoList.add(partInfo); + } + inputInfo.setPartitions(partInfoList); + inputInfo.setTableInfo(HCatTableInfo.valueOf(table)); + job.getConfiguration().set( + HCatConstants.HCAT_KEY_JOB_INFO, + HCatUtil.serialize(inputInfo)); + List rv = new ArrayList(2); + rv.add(HCatSchemaUtils.getHCatSchema(table.getSd().getCols())); + rv.add(HCatSchemaUtils.getHCatSchema(partCols)); + return rv; + } catch(SemanticException e) { + throw new IOException(e); + } + } + + private static List filterPartitions(Map partitionFilter, + List partitions, List partCols) throws IOException { + List partInfos = new LinkedList(); + for (Partition partition : partitions) { + boolean matches = true; + List partVals = partition.getValues(); + assert partCols.size() == partVals.size(); + Map partSpec = EximUtil.makePartSpec(partCols, partVals); + if (partitionFilter != null) { + for (Map.Entry constraint : partitionFilter.entrySet()) { + String partVal = partSpec.get(constraint.getKey()); + if ((partVal == null) || !partVal.equals(constraint.getValue())) { + matches = false; + break; + } + } + } + if (matches) { + PartInfo partInfo = InitializeInput.extractPartInfo(partition.getSd(), + partition.getParameters()); + partInfo.setPartitionValues(partSpec); + partInfos.add(partInfo); + } + } + return partInfos; + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximOutputCommitter.java.broken hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximOutputCommitter.java.broken new file mode 100644 index 0000000..0ab8c22 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximOutputCommitter.java.broken @@ -0,0 +1,166 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hcatalog.mapreduce; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.EximUtil; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.JobStatus; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hcatalog.common.ErrorType; +import org.apache.hcatalog.common.HCatException; + +public class HCatEximOutputCommitter extends OutputCommitter { + + private static final Log LOG = LogFactory.getLog(HCatEximOutputCommitter.class); + + private final OutputCommitter baseCommitter; + + public HCatEximOutputCommitter(JobContext context, OutputCommitter baseCommitter) { + this.baseCommitter = baseCommitter; + } + + @Override + public void abortTask(TaskAttemptContext context) throws IOException { + baseCommitter.abortTask(context); + } + + @Override + public void commitTask(TaskAttemptContext context) throws IOException { + baseCommitter.commitTask(context); + } + + @Override + public boolean needsTaskCommit(TaskAttemptContext context) throws IOException { + return baseCommitter.needsTaskCommit(context); + } + + @Override + public void setupJob(JobContext context) throws IOException { + if( baseCommitter != null ) { + baseCommitter.setupJob(context); + } + } + + @Override + public void setupTask(TaskAttemptContext context) throws IOException { + baseCommitter.setupTask(context); + } + + @Override + public void abortJob(JobContext jobContext, JobStatus.State state) throws IOException { + if(baseCommitter != null) { + baseCommitter.abortJob(jobContext, state); + } + OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(jobContext); + + Path src = new Path(jobInfo.getLocation()); + FileSystem fs = src.getFileSystem(jobContext.getConfiguration()); + fs.delete(src, true); + } + + @Override + public void commitJob(JobContext jobContext) throws IOException { + if(baseCommitter != null) { + baseCommitter.commitJob(jobContext); + } + } + + @Override + public void cleanupJob(JobContext jobContext) throws IOException { + LOG.info("HCatEximOutputCommitter.cleanup invoked; m.o.d : " + + jobContext.getConfiguration().get("mapred.output.dir")); + if (baseCommitter != null) { + LOG.info("baseCommitter.class = " + baseCommitter.getClass().getName()); + baseCommitter.cleanupJob(jobContext); + } + + OutputJobInfo jobInfo = HCatBaseOutputFormat.getJobInfo(jobContext); + Configuration conf = jobContext.getConfiguration(); + FileSystem fs; + try { + fs = FileSystem.get(new URI(jobInfo.getTableInfo().getTable().getSd().getLocation()), conf); + } catch (URISyntaxException e) { + throw new IOException(e); + } + doCleanup(jobInfo, fs); + } + + private static void doCleanup(OutputJobInfo jobInfo, FileSystem fs) throws IOException, + HCatException { + try { + Table ttable = jobInfo.getTableInfo().getTable(); + org.apache.hadoop.hive.ql.metadata.Table table = new org.apache.hadoop.hive.ql.metadata.Table( + ttable); + StorageDescriptor tblSD = ttable.getSd(); + Path tblPath = new Path(tblSD.getLocation()); + Path path = new Path(tblPath, "_metadata"); + List tpartitions = null; + try { + Map.Entry> rv = EximUtil + .readMetaData(fs, path); + tpartitions = rv.getValue(); + } catch (IOException e) { + } + List partitions = + new ArrayList(); + if (tpartitions != null) { + for (Partition tpartition : tpartitions) { + partitions.add(new org.apache.hadoop.hive.ql.metadata.Partition(table, tpartition)); + } + } + if (!table.getPartitionKeys().isEmpty()) { + Map partitionValues = jobInfo.getPartitionValues(); + org.apache.hadoop.hive.ql.metadata.Partition partition = + new org.apache.hadoop.hive.ql.metadata.Partition(table, + partitionValues, + new Path(tblPath, Warehouse.makePartPath(partitionValues))); + partition.getTPartition().setParameters(table.getParameters()); + partitions.add(partition); + } + EximUtil.createExportDump(fs, path, (table), partitions); + } catch (SemanticException e) { + throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e); + } catch (HiveException e) { + throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e); + } catch (MetaException e) { + throw new HCatException(ErrorType.ERROR_PUBLISHING_PARTITION, e); + } + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximOutputFormat.java.broken hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximOutputFormat.java.broken new file mode 100644 index 0000000..6181284 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatEximOutputFormat.java.broken @@ -0,0 +1,176 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hcatalog.mapreduce; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.TreeMap; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.io.RCFileInputFormat; +import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; +import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hcatalog.common.ErrorType; +import org.apache.hcatalog.common.HCatConstants; +import org.apache.hcatalog.common.HCatException; +import org.apache.hcatalog.common.HCatUtil; +import org.apache.hcatalog.data.HCatRecord; +import org.apache.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hcatalog.data.schema.HCatSchema; +import org.apache.hcatalog.data.schema.HCatSchemaUtils; +import org.apache.hcatalog.rcfile.RCFileInputDriver; +import org.apache.hcatalog.rcfile.RCFileOutputDriver; + +/** + * The OutputFormat to use to write data to HCat without a hcat server. This can then + * be imported into a hcat instance, or used with a HCatEximInputFormat. As in + * HCatOutputFormat, the key value is ignored and + * and should be given as null. The value is the HCatRecord to write. + */ +public class HCatEximOutputFormat extends HCatBaseOutputFormat { + + private static final Log LOG = LogFactory.getLog(HCatEximOutputFormat.class); + + /** + * Get the record writer for the job. Uses the Table's default OutputStorageDriver + * to get the record writer. + * + * @param context + * the information about the current task. + * @return a RecordWriter to write the output for the job. + * @throws IOException + */ + @Override + public RecordWriter, HCatRecord> + getRecordWriter(TaskAttemptContext context + ) throws IOException, InterruptedException { + return getOutputFormat(context).getRecordWriter(context); + } + + /** + * Get the output committer for this output format. This is responsible + * for ensuring the output is committed correctly. + * @param context the task context + * @return an output committer + * @throws IOException + * @throws InterruptedException + */ + @Override + public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { + return new HCatEximOutputCommitter(context,((OutputCommitterContainer)getOutputFormat(context).getOutputCommitter(context)).getBaseOutputCommitter()); + } + + /** + * Check for validity of the output-specification for the job. + * @param context information about the job + * @throws IOException when output should not be attempted + */ + @Override + public void checkOutputSpecs(JobContext context + ) throws IOException, InterruptedException { + ((OutputFormatContainer)getOutputFormat(context)).getBaseOutputFormat().checkOutputSpecs(context); + } + + public static void setOutput(Job job, String dbname, String tablename, String location, + HCatSchema partitionSchema, List partitionValues, HCatSchema columnSchema) throws HCatException { + setOutput(job, dbname, tablename, location, partitionSchema, partitionValues, columnSchema, + RCFileInputDriver.class.getName(), + RCFileOutputDriver.class.getName(), + RCFileInputFormat.class.getName(), + RCFileOutputFormat.class.getName(), + ColumnarSerDe.class.getName()); + } + + @SuppressWarnings("unchecked") + public static void setOutput(Job job, String dbname, String tablename, String location, + HCatSchema partitionSchema, + List partitionValues, + HCatSchema columnSchema, + String isdname, String osdname, + String ifname, String ofname, + String serializationLib) throws HCatException { + Map partSpec = new TreeMap(); + List partKeys = null; + if (partitionSchema != null) { + partKeys = partitionSchema.getFields(); + if (partKeys.size() != partitionValues.size()) { + throw new IllegalArgumentException("Partition key size differs from partition value size"); + } + for (int i = 0; i < partKeys.size(); ++i) { + HCatFieldSchema partKey = partKeys.get(i); + if (partKey.getType() != HCatFieldSchema.Type.STRING) { + throw new IllegalArgumentException("Partition key type string is only supported"); + } + partSpec.put(partKey.getName(), partitionValues.get(i)); + } + } + StorerInfo storerInfo = new StorerInfo(isdname, osdname, new Properties()); + OutputJobInfo outputJobInfo = OutputJobInfo.create(dbname,tablename,partSpec,null,null); + org.apache.hadoop.hive.ql.metadata.Table tbl = new + org.apache.hadoop.hive.ql.metadata.Table(dbname, tablename); + Table table = tbl.getTTable(); + table.getParameters().put(HCatConstants.HCAT_ISD_CLASS, isdname); + table.getParameters().put(HCatConstants.HCAT_OSD_CLASS, osdname); + try { + String partname = null; + if ((partKeys != null) && !partKeys.isEmpty()) { + List partSchema = HCatSchemaUtils.getFieldSchemas(partKeys); + table.setPartitionKeys(partSchema); + partname = Warehouse.makePartName(partSchema, partitionValues); + } else { + partname = "data"; + } + StorageDescriptor sd = table.getSd(); + sd.setLocation(location); + String dataLocation = location + "/" + partname; + outputJobInfo.setTableInfo(new HCatTableInfo(dbname,tablename,columnSchema,null,storerInfo,table)); + outputJobInfo.setOutputSchema(columnSchema); + outputJobInfo.setLocation(dataLocation); + setPartDetails(outputJobInfo, columnSchema, partSpec); + sd.setCols(HCatUtil.getFieldSchemaList(outputJobInfo.getOutputSchema().getFields())); + sd.setInputFormat(ifname); + sd.setOutputFormat(ofname); + SerDeInfo serdeInfo = sd.getSerdeInfo(); + serdeInfo.setSerializationLib(serializationLib); + Configuration conf = job.getConfiguration(); + conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); + } catch (IOException e) { + throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e); + } catch (MetaException e) { + throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e); + } + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatInputFormat.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatInputFormat.java new file mode 100644 index 0000000..0bcb133 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatInputFormat.java @@ -0,0 +1,136 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import java.io.IOException; +import java.util.Properties; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.Job; + +/** + * The InputFormat to use to read data from HCatalog. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class HCatInputFormat extends HCatBaseInputFormat { + + private Configuration conf; + private InputJobInfo inputJobInfo; + + /** + * @deprecated as of release 0.5, and will be removed in a future release + */ + @Deprecated + public static void setInput(Job job, InputJobInfo inputJobInfo) throws IOException { + setInput(job.getConfiguration(), inputJobInfo); + } + + /** + * @deprecated as of release 0.5, and will be removed in a future release + */ + @Deprecated + public static void setInput(Configuration conf, InputJobInfo inputJobInfo) throws IOException { + setInput(conf, inputJobInfo.getDatabaseName(), inputJobInfo.getTableName()) + .setFilter(inputJobInfo.getFilter()) + .setProperties(inputJobInfo.getProperties()); + } + + /** + * See {@link #setInput(org.apache.hadoop.conf.Configuration, String, String)} + */ + public static HCatInputFormat setInput(Job job, String dbName, String tableName) throws IOException { + return setInput(job.getConfiguration(), dbName, tableName); + } + + /** + * Set inputs to use for the job. This queries the metastore with the given input + * specification and serializes matching partitions into the job conf for use by MR tasks. + * @param conf the job configuration + * @param dbName database name, which if null 'default' is used + * @param tableName table name + * @throws IOException on all errors + */ + public static HCatInputFormat setInput(Configuration conf, String dbName, String tableName) + throws IOException { + + Preconditions.checkNotNull(conf, "required argument 'conf' is null"); + Preconditions.checkNotNull(tableName, "required argument 'tableName' is null"); + + HCatInputFormat hCatInputFormat = new HCatInputFormat(); + hCatInputFormat.conf = conf; + hCatInputFormat.inputJobInfo = InputJobInfo.create(dbName, tableName, null, null); + + try { + InitializeInput.setInput(conf, hCatInputFormat.inputJobInfo); + } catch (Exception e) { + throw new IOException(e); + } + + return hCatInputFormat; + } + + /** + * Set a filter on the input table. + * @param filter the filter specification, which may be null + * @return this + * @throws IOException on all errors + */ + public HCatInputFormat setFilter(String filter) throws IOException { + // null filters are supported to simplify client code + if (filter != null) { + inputJobInfo = InputJobInfo.create( + inputJobInfo.getDatabaseName(), + inputJobInfo.getTableName(), + filter, + inputJobInfo.getProperties()); + try { + InitializeInput.setInput(conf, inputJobInfo); + } catch (Exception e) { + throw new IOException(e); + } + } + return this; + } + + /** + * Set properties for the input format. + * @param properties properties for the input specification + * @return this + * @throws IOException on all errors + */ + public HCatInputFormat setProperties(Properties properties) throws IOException { + Preconditions.checkNotNull(properties, "required argument 'properties' is null"); + inputJobInfo = InputJobInfo.create( + inputJobInfo.getDatabaseName(), + inputJobInfo.getTableName(), + inputJobInfo.getFilter(), + properties); + try { + InitializeInput.setInput(conf, inputJobInfo); + } catch (Exception e) { + throw new IOException(e); + } + return this; + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatMapRedUtil.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatMapRedUtil.java new file mode 100644 index 0000000..a808565 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatMapRedUtil.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.util.Progressable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.JobContext; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.TaskAttemptContext; +import org.apache.hadoop.mapred.TaskAttemptID; + +public class HCatMapRedUtil { + + public static TaskAttemptContext createTaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext context) { + return createTaskAttemptContext(new JobConf(context.getConfiguration()), + org.apache.hadoop.mapred.TaskAttemptID.forName(context.getTaskAttemptID().toString()), + Reporter.NULL); + } + + public static org.apache.hadoop.mapreduce.TaskAttemptContext createTaskAttemptContext(Configuration conf, org.apache.hadoop.mapreduce.TaskAttemptID id) { + return ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf,id); + } + + public static TaskAttemptContext createTaskAttemptContext(JobConf conf, TaskAttemptID id, Progressable progressable) { + return ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf, id, (Reporter) progressable); + } + + public static org.apache.hadoop.mapred.JobContext createJobContext(org.apache.hadoop.mapreduce.JobContext context) { + return createJobContext((JobConf)context.getConfiguration(), + context.getJobID(), + Reporter.NULL); + } + + public static JobContext createJobContext(JobConf conf, org.apache.hadoop.mapreduce.JobID id, Progressable progressable) { + return ShimLoader.getHadoopShims().getHCatShim().createJobContext(conf, id, (Reporter) progressable); + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatOutputFormat.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatOutputFormat.java new file mode 100644 index 0000000..1a4a8ee --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatOutputFormat.java @@ -0,0 +1,280 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Index; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.security.Credentials; +import org.apache.hive.hcatalog.common.ErrorType; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** The OutputFormat to use to write data to HCatalog. The key value is ignored and + * should be given as null. The value is the HCatRecord to write.*/ +public class HCatOutputFormat extends HCatBaseOutputFormat { + + static final private Logger LOG = LoggerFactory.getLogger(HCatOutputFormat.class); + + private static int maxDynamicPartitions; + private static boolean harRequested; + + /** + * @see org.apache.hive.hcatalog.mapreduce.HCatOutputFormat#setOutput(org.apache.hadoop.conf.Configuration, Credentials, OutputJobInfo) + */ + public static void setOutput(Job job, OutputJobInfo outputJobInfo) throws IOException { + setOutput(job.getConfiguration(), job.getCredentials(), outputJobInfo); + } + + /** + * Set the information about the output to write for the job. This queries the metadata server + * to find the StorageHandler to use for the table. It throws an error if the + * partition is already published. + * @param conf the Configuration object + * @param credentials the Credentials object + * @param outputJobInfo the table output information for the job + * @throws IOException the exception in communicating with the metadata server + */ + @SuppressWarnings("unchecked") + public static void setOutput(Configuration conf, Credentials credentials, + OutputJobInfo outputJobInfo) throws IOException { + HiveMetaStoreClient client = null; + + try { + + HiveConf hiveConf = HCatUtil.getHiveConf(conf); + client = HCatUtil.getHiveClient(hiveConf); + Table table = HCatUtil.getTable(client, outputJobInfo.getDatabaseName(), + outputJobInfo.getTableName()); + + List indexList = client.listIndexNames(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), Short.MAX_VALUE); + + for (String indexName : indexList) { + Index index = client.getIndex(outputJobInfo.getDatabaseName(), outputJobInfo.getTableName(), indexName); + if (!index.isDeferredRebuild()) { + throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a table with an automatic index from Pig/Mapreduce is not supported"); + } + } + StorageDescriptor sd = table.getTTable().getSd(); + + if (sd.isCompressed()) { + throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a compressed partition from Pig/Mapreduce is not supported"); + } + + if (sd.getBucketCols() != null && !sd.getBucketCols().isEmpty()) { + throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with bucket definition from Pig/Mapreduce is not supported"); + } + + if (sd.getSortCols() != null && !sd.getSortCols().isEmpty()) { + throw new HCatException(ErrorType.ERROR_NOT_SUPPORTED, "Store into a partition with sorted column definition from Pig/Mapreduce is not supported"); + } + + if (table.getTTable().getPartitionKeysSize() == 0) { + if ((outputJobInfo.getPartitionValues() != null) && (!outputJobInfo.getPartitionValues().isEmpty())) { + // attempt made to save partition values in non-partitioned table - throw error. + throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, + "Partition values specified for non-partitioned table"); + } + // non-partitioned table + outputJobInfo.setPartitionValues(new HashMap()); + + } else { + // partitioned table, we expect partition values + // convert user specified map to have lower case key names + Map valueMap = new HashMap(); + if (outputJobInfo.getPartitionValues() != null) { + for (Map.Entry entry : outputJobInfo.getPartitionValues().entrySet()) { + valueMap.put(entry.getKey().toLowerCase(), entry.getValue()); + } + } + + if ((outputJobInfo.getPartitionValues() == null) + || (outputJobInfo.getPartitionValues().size() < table.getTTable().getPartitionKeysSize())) { + // dynamic partition usecase - partition values were null, or not all were specified + // need to figure out which keys are not specified. + List dynamicPartitioningKeys = new ArrayList(); + boolean firstItem = true; + for (FieldSchema fs : table.getPartitionKeys()) { + if (!valueMap.containsKey(fs.getName().toLowerCase())) { + dynamicPartitioningKeys.add(fs.getName().toLowerCase()); + } + } + + if (valueMap.size() + dynamicPartitioningKeys.size() != table.getTTable().getPartitionKeysSize()) { + // If this isn't equal, then bogus key values have been inserted, error out. + throw new HCatException(ErrorType.ERROR_INVALID_PARTITION_VALUES, "Invalid partition keys specified"); + } + + outputJobInfo.setDynamicPartitioningKeys(dynamicPartitioningKeys); + String dynHash; + if ((dynHash = conf.get(HCatConstants.HCAT_DYNAMIC_PTN_JOBID)) == null) { + dynHash = String.valueOf(Math.random()); +// LOG.info("New dynHash : ["+dynHash+"]"); +// }else{ +// LOG.info("Old dynHash : ["+dynHash+"]"); + } + conf.set(HCatConstants.HCAT_DYNAMIC_PTN_JOBID, dynHash); + + } + + outputJobInfo.setPartitionValues(valueMap); + } + + // To get around hbase failure on single node, see BUG-4383 + conf.set("dfs.client.read.shortcircuit", "false"); + HCatSchema tableSchema = HCatUtil.extractSchema(table); + StorerInfo storerInfo = + InternalUtil.extractStorerInfo(table.getTTable().getSd(), table.getParameters()); + + List partitionCols = new ArrayList(); + for (FieldSchema schema : table.getPartitionKeys()) { + partitionCols.add(schema.getName()); + } + + HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo); + + //Serialize the output info into the configuration + outputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable())); + outputJobInfo.setOutputSchema(tableSchema); + harRequested = getHarRequested(hiveConf); + outputJobInfo.setHarRequested(harRequested); + maxDynamicPartitions = getMaxDynamicPartitions(hiveConf); + outputJobInfo.setMaximumDynamicPartitions(maxDynamicPartitions); + + HCatUtil.configureOutputStorageHandler(storageHandler, conf, outputJobInfo); + + Path tblPath = new Path(table.getTTable().getSd().getLocation()); + + /* Set the umask in conf such that files/dirs get created with table-dir + * permissions. Following three assumptions are made: + * 1. Actual files/dirs creation is done by RecordWriter of underlying + * output format. It is assumed that they use default permissions while creation. + * 2. Default Permissions = FsPermission.getDefault() = 777. + * 3. UMask is honored by underlying filesystem. + */ + + FsPermission.setUMask(conf, FsPermission.getDefault().applyUMask( + tblPath.getFileSystem(conf).getFileStatus(tblPath).getPermission())); + + if (Security.getInstance().isSecurityEnabled()) { + Security.getInstance().handleSecurity(credentials, outputJobInfo, client, conf, harRequested); + } + } catch (Exception e) { + if (e instanceof HCatException) { + throw (HCatException) e; + } else { + throw new HCatException(ErrorType.ERROR_SET_OUTPUT, e); + } + } finally { + HCatUtil.closeHiveClientQuietly(client); + } + } + + /** + * @see org.apache.hive.hcatalog.mapreduce.HCatOutputFormat#setSchema(org.apache.hadoop.conf.Configuration, org.apache.hive.hcatalog.data.schema.HCatSchema) + */ + public static void setSchema(final Job job, final HCatSchema schema) throws IOException { + setSchema(job.getConfiguration(), schema); + } + + /** + * Set the schema for the data being written out to the partition. The + * table schema is used by default for the partition if this is not called. + * @param conf the job Configuration object + * @param schema the schema for the data + * @throws IOException + */ + public static void setSchema(final Configuration conf, final HCatSchema schema) throws IOException { + OutputJobInfo jobInfo = getJobInfo(conf); + Map partMap = jobInfo.getPartitionValues(); + setPartDetails(jobInfo, schema, partMap); + conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(jobInfo)); + } + + /** + * Get the record writer for the job. This uses the StorageHandler's default + * OutputFormat to get the record writer. + * @param context the information about the current task + * @return a RecordWriter to write the output for the job + * @throws IOException + * @throws InterruptedException + */ + @Override + public RecordWriter, HCatRecord> + getRecordWriter(TaskAttemptContext context) + throws IOException, InterruptedException { + return getOutputFormat(context).getRecordWriter(context); + } + + + /** + * Get the output committer for this output format. This is responsible + * for ensuring the output is committed correctly. + * @param context the task context + * @return an output committer + * @throws IOException + * @throws InterruptedException + */ + @Override + public OutputCommitter getOutputCommitter(TaskAttemptContext context + ) throws IOException, InterruptedException { + return getOutputFormat(context).getOutputCommitter(context); + } + + private static int getMaxDynamicPartitions(HiveConf hConf) { + // by default the bounds checking for maximum number of + // dynamic partitions is disabled (-1) + int maxDynamicPartitions = -1; + + if (HCatConstants.HCAT_IS_DYNAMIC_MAX_PTN_CHECK_ENABLED) { + maxDynamicPartitions = hConf.getIntVar( + HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS); + } + + return maxDynamicPartitions; + } + + private static boolean getHarRequested(HiveConf hConf) { + return hConf.getBoolVar(HiveConf.ConfVars.HIVEARCHIVEENABLED); + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatRecordReader.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatRecordReader.java new file mode 100644 index 0000000..8637110 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatRecordReader.java @@ -0,0 +1,285 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.mapreduce; + +import java.io.IOException; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.LazyHCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** The HCat wrapper for the underlying RecordReader, + * this ensures that the initialize on + * the underlying record reader is done with the underlying split, + * not with HCatSplit. + */ +class HCatRecordReader extends RecordReader { + + private static final Logger LOG = LoggerFactory.getLogger(HCatRecordReader.class); + + private InputErrorTracker errorTracker; + + WritableComparable currentKey; + Writable currentValue; + HCatRecord currentHCatRecord; + + /** The underlying record reader to delegate to. */ + private org.apache.hadoop.mapred.RecordReader baseRecordReader; + + /** The storage handler used */ + private final HCatStorageHandler storageHandler; + + private Deserializer deserializer; + + private Map valuesNotInDataCols; + + private HCatSchema outputSchema = null; + private HCatSchema dataSchema = null; + + /** + * Instantiates a new hcat record reader. + */ + public HCatRecordReader(HCatStorageHandler storageHandler, + Map valuesNotInDataCols) { + this.storageHandler = storageHandler; + this.valuesNotInDataCols = valuesNotInDataCols; + } + + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.RecordReader#initialize( + * org.apache.hadoop.mapreduce.InputSplit, + * org.apache.hadoop.mapreduce.TaskAttemptContext) + */ + @Override + public void initialize(org.apache.hadoop.mapreduce.InputSplit split, + TaskAttemptContext taskContext) throws IOException, InterruptedException { + + HCatSplit hcatSplit = InternalUtil.castToHCatSplit(split); + + baseRecordReader = createBaseRecordReader(hcatSplit, storageHandler, taskContext); + createDeserializer(hcatSplit, storageHandler, taskContext); + + // Pull the output schema out of the TaskAttemptContext + outputSchema = (HCatSchema) HCatUtil.deserialize( + taskContext.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA)); + + if (outputSchema == null) { + outputSchema = hcatSplit.getTableSchema(); + } + + // Pull the table schema out of the Split info + // TODO This should be passed in the TaskAttemptContext instead + dataSchema = hcatSplit.getDataSchema(); + + errorTracker = new InputErrorTracker(taskContext.getConfiguration()); + } + + private org.apache.hadoop.mapred.RecordReader createBaseRecordReader(HCatSplit hcatSplit, + HCatStorageHandler storageHandler, TaskAttemptContext taskContext) throws IOException { + + JobConf jobConf = HCatUtil.getJobConfFromContext(taskContext); + HCatUtil.copyJobPropertiesToJobConf(hcatSplit.getPartitionInfo().getJobProperties(), jobConf); + org.apache.hadoop.mapred.InputFormat inputFormat = + HCatInputFormat.getMapRedInputFormat(jobConf, storageHandler.getInputFormatClass()); + return inputFormat.getRecordReader(hcatSplit.getBaseSplit(), jobConf, + InternalUtil.createReporter(taskContext)); + } + + private void createDeserializer(HCatSplit hcatSplit, HCatStorageHandler storageHandler, + TaskAttemptContext taskContext) throws IOException { + + deserializer = ReflectionUtils.newInstance(storageHandler.getSerDeClass(), + taskContext.getConfiguration()); + + try { + InternalUtil.initializeDeserializer(deserializer, storageHandler.getConf(), + hcatSplit.getPartitionInfo().getTableInfo(), + hcatSplit.getPartitionInfo().getPartitionSchema()); + } catch (SerDeException e) { + throw new IOException("Failed initializing deserializer " + + storageHandler.getSerDeClass().getName(), e); + } + } + + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentKey() + */ + @Override + public WritableComparable getCurrentKey() + throws IOException, InterruptedException { + return currentKey; + } + + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.RecordReader#getCurrentValue() + */ + @Override + public HCatRecord getCurrentValue() throws IOException, InterruptedException { + return currentHCatRecord; + } + + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.RecordReader#getProgress() + */ + @Override + public float getProgress() { + try { + return baseRecordReader.getProgress(); + } catch (IOException e) { + LOG.warn("Exception in HCatRecord reader", e); + } + return 0.0f; // errored + } + + /** + * Check if the wrapped RecordReader has another record, and if so convert it into an + * HCatRecord. We both check for records and convert here so a configurable percent of + * bad records can be tolerated. + * + * @return if there is a next record + * @throws IOException on error + * @throws InterruptedException on error + */ + @Override + public boolean nextKeyValue() throws IOException, InterruptedException { + if (currentKey == null) { + currentKey = baseRecordReader.createKey(); + currentValue = baseRecordReader.createValue(); + } + + while (baseRecordReader.next(currentKey, currentValue)) { + HCatRecord r = null; + Throwable t = null; + + errorTracker.incRecords(); + + try { + Object o = deserializer.deserialize(currentValue); + r = new LazyHCatRecord(o, deserializer.getObjectInspector()); + } catch (Throwable throwable) { + t = throwable; + } + + if (r == null) { + errorTracker.incErrors(t); + continue; + } + + DefaultHCatRecord dr = new DefaultHCatRecord(outputSchema.size()); + int i = 0; + for (String fieldName : outputSchema.getFieldNames()) { + if (dataSchema.getPosition(fieldName) != null) { + dr.set(i, r.get(fieldName, dataSchema)); + } else { + dr.set(i, valuesNotInDataCols.get(fieldName)); + } + i++; + } + + currentHCatRecord = dr; + return true; + } + + return false; + } + + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.RecordReader#close() + */ + @Override + public void close() throws IOException { + baseRecordReader.close(); + } + + /** + * Tracks number of of errors in input and throws a Runtime exception + * if the rate of errors crosses a limit. + *
+ * The intention is to skip over very rare file corruption or incorrect + * input, but catch programmer errors (incorrect format, or incorrect + * deserializers etc). + * + * This class was largely copied from Elephant-Bird (thanks @rangadi!) + * https://github.com/kevinweil/elephant-bird/blob/master/core/src/main/java/com/twitter/elephantbird/mapreduce/input/LzoRecordReader.java + */ + static class InputErrorTracker { + long numRecords; + long numErrors; + + double errorThreshold; // max fraction of errors allowed + long minErrors; // throw error only after this many errors + + InputErrorTracker(Configuration conf) { + errorThreshold = conf.getFloat(HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY, + HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_DEFAULT); + minErrors = conf.getLong(HCatConstants.HCAT_INPUT_BAD_RECORD_MIN_KEY, + HCatConstants.HCAT_INPUT_BAD_RECORD_MIN_DEFAULT); + numRecords = 0; + numErrors = 0; + } + + void incRecords() { + numRecords++; + } + + void incErrors(Throwable cause) { + numErrors++; + if (numErrors > numRecords) { + // incorrect use of this class + throw new RuntimeException("Forgot to invoke incRecords()?"); + } + + if (cause == null) { + cause = new Exception("Unknown error"); + } + + if (errorThreshold <= 0) { // no errors are tolerated + throw new RuntimeException("error while reading input records", cause); + } + + LOG.warn("Error while reading an input record (" + + numErrors + " out of " + numRecords + " so far ): ", cause); + + double errRate = numErrors / (double) numRecords; + + // will always excuse the first error. We can decide if single + // error crosses threshold inside close() if we want to. + if (numErrors >= minErrors && errRate > errorThreshold) { + LOG.error(numErrors + " out of " + numRecords + + " crosses configured threshold (" + errorThreshold + ")"); + throw new RuntimeException("error rate while reading input records crossed threshold", cause); + } + } + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatSplit.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatSplit.java new file mode 100644 index 0000000..6cbe268 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatSplit.java @@ -0,0 +1,186 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.mapreduce; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.lang.reflect.Constructor; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableUtils; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** The HCatSplit wrapper around the InputSplit returned by the underlying InputFormat */ +public class HCatSplit extends InputSplit + implements Writable, org.apache.hadoop.mapred.InputSplit { + + private static final Logger LOG = LoggerFactory.getLogger(HCatSplit.class); + /** The partition info for the split. */ + private PartInfo partitionInfo; + + /** The split returned by the underlying InputFormat split. */ + private org.apache.hadoop.mapred.InputSplit baseMapRedSplit; + + /** The schema for the HCatTable */ + private HCatSchema tableSchema; + + private HiveConf hiveConf; + + /** + * Instantiates a new hcat split. + */ + public HCatSplit() { + } + + /** + * Instantiates a new hcat split. + * + * @param partitionInfo the partition info + * @param baseMapRedSplit the base mapred split + * @param tableSchema the table level schema + */ + public HCatSplit(PartInfo partitionInfo, + org.apache.hadoop.mapred.InputSplit baseMapRedSplit, + HCatSchema tableSchema) { + + this.partitionInfo = partitionInfo; + // dataSchema can be obtained from partitionInfo.getPartitionSchema() + this.baseMapRedSplit = baseMapRedSplit; + this.tableSchema = tableSchema; + } + + /** + * Gets the partition info. + * @return the partitionInfo + */ + public PartInfo getPartitionInfo() { + return partitionInfo; + } + + /** + * Gets the underlying InputSplit. + * @return the baseMapRedSplit + */ + public org.apache.hadoop.mapred.InputSplit getBaseSplit() { + return baseMapRedSplit; + } + + /** + * Gets the data schema. + * @return the table schema + */ + public HCatSchema getDataSchema() { + return this.partitionInfo.getPartitionSchema(); + } + + /** + * Gets the table schema. + * @return the table schema + */ + public HCatSchema getTableSchema() { + return this.tableSchema; + } + + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.InputSplit#getLength() + */ + @Override + public long getLength() { + try { + return baseMapRedSplit.getLength(); + } catch (IOException e) { + LOG.warn("Exception in HCatSplit", e); + } + return 0; // we errored + } + + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.InputSplit#getLocations() + */ + @Override + public String[] getLocations() { + try { + return baseMapRedSplit.getLocations(); + } catch (IOException e) { + LOG.warn("Exception in HCatSplit", e); + } + return new String[0]; // we errored + } + + /* (non-Javadoc) + * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput) + */ + @SuppressWarnings("unchecked") + @Override + public void readFields(DataInput input) throws IOException { + String partitionInfoString = WritableUtils.readString(input); + partitionInfo = (PartInfo) HCatUtil.deserialize(partitionInfoString); + + String baseSplitClassName = WritableUtils.readString(input); + org.apache.hadoop.mapred.InputSplit split; + try { + Class splitClass = + (Class) Class.forName(baseSplitClassName); + + //Class.forName().newInstance() does not work if the underlying + //InputSplit has package visibility + Constructor + constructor = + splitClass.getDeclaredConstructor(new Class[]{}); + constructor.setAccessible(true); + + split = constructor.newInstance(); + // read baseSplit from input + ((Writable) split).readFields(input); + this.baseMapRedSplit = split; + } catch (Exception e) { + throw new IOException("Exception from " + baseSplitClassName, e); + } + + String tableSchemaString = WritableUtils.readString(input); + tableSchema = (HCatSchema) HCatUtil.deserialize(tableSchemaString); + } + + /* (non-Javadoc) + * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput) + */ + @Override + public void write(DataOutput output) throws IOException { + String partitionInfoString = HCatUtil.serialize(partitionInfo); + + // write partitionInfo into output + WritableUtils.writeString(output, partitionInfoString); + + WritableUtils.writeString(output, baseMapRedSplit.getClass().getName()); + Writable baseSplitWritable = (Writable) baseMapRedSplit; + //write baseSplit into output + baseSplitWritable.write(output); + + //write the table schema into output + String tableSchemaString = HCatUtil.serialize(tableSchema); + WritableUtils.writeString(output, tableSchemaString); + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatStorageHandler.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatStorageHandler.java new file mode 100644 index 0000000..e45149d --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatStorageHandler.java @@ -0,0 +1,120 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; +import org.apache.hadoop.mapred.OutputFormat; + +/** + * The abstract Class HCatStorageHandler would server as the base class for all + * the storage handlers required for non-native tables in HCatalog. + */ +public abstract class HCatStorageHandler extends DefaultStorageHandler { + + //TODO move this to HiveStorageHandler + + /** + * This method is called to allow the StorageHandlers the chance + * to populate the JobContext.getConfiguration() with properties that + * maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc). + * Key value pairs passed into jobProperties is guaranteed to be set in the job's + * configuration object. User's can retrieve "context" information from tableDesc. + * User's should avoid mutating tableDesc and only make changes in jobProperties. + * This method is expected to be idempotent such that a job called with the + * same tableDesc values should return the same key-value pairs in jobProperties. + * Any external state set by this method should remain the same if this method is + * called again. It is up to the user to determine how best guarantee this invariant. + * + * This method in particular is to create a configuration for input. + * @param tableDesc + * @param jobProperties + */ + public abstract void configureInputJobProperties(TableDesc tableDesc, Map jobProperties); + + //TODO move this to HiveStorageHandler + + /** + * This method is called to allow the StorageHandlers the chance + * to populate the JobContext.getConfiguration() with properties that + * maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc). + * Key value pairs passed into jobProperties is guaranteed to be set in the job's + * configuration object. User's can retrieve "context" information from tableDesc. + * User's should avoid mutating tableDesc and only make changes in jobProperties. + * This method is expected to be idempotent such that a job called with the + * same tableDesc values should return the same key-value pairs in jobProperties. + * Any external state set by this method should remain the same if this method is + * called again. It is up to the user to determine how best guarantee this invariant. + * + * This method in particular is to create a configuration for output. + * @param tableDesc + * @param jobProperties + */ + public abstract void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties); + + /** + * + * + * @return authorization provider + * @throws HiveException + */ + public abstract HiveAuthorizationProvider getAuthorizationProvider() + throws HiveException; + + /* + * (non-Javadoc) + * + * @see org.apache.hadoop.hive.ql.metadata.HiveStorageHandler# + * configureTableJobProperties(org.apache.hadoop.hive.ql.plan.TableDesc, + * java.util.Map) + */ + @Override + @Deprecated + public final void configureTableJobProperties(TableDesc tableDesc, + Map jobProperties) { + } + + /* + * (non-Javadoc) + * + * @see org.apache.hadoop.conf.Configurable#getConf() + */ + @Override + public abstract Configuration getConf(); + + /* + * (non-Javadoc) + * + * @see org.apache.hadoop.conf.Configurable#setConf(org.apache.hadoop.conf. + * Configuration) + */ + @Override + public abstract void setConf(Configuration conf); + + OutputFormatContainer getOutputFormatContainer(OutputFormat outputFormat) { + return new DefaultOutputFormatContainer(outputFormat); + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatTableInfo.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatTableInfo.java new file mode 100644 index 0000000..783d41a --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/HCatTableInfo.java @@ -0,0 +1,187 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import java.io.IOException; +import java.io.Serializable; + +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.schema.HCatSchema; + +/** + * + * HCatTableInfo - class to communicate table information to {@link HCatInputFormat} + * and {@link HCatOutputFormat} + * + */ +public class HCatTableInfo implements Serializable { + + + private static final long serialVersionUID = 1L; + + /** The db and table names */ + private final String databaseName; + private final String tableName; + + /** The table schema. */ + private final HCatSchema dataColumns; + private final HCatSchema partitionColumns; + + /** The table being written to */ + private final Table table; + + /** The storer info */ + private StorerInfo storerInfo; + + /** + * Initializes a new HCatTableInfo instance to be used with {@link HCatInputFormat} + * for reading data from a table. + * work with hadoop security, the kerberos principal name of the server - else null + * The principal name should be of the form: + * /_HOST@ like "hcat/_HOST@myrealm.com" + * The special string _HOST will be replaced automatically with the correct host name + * @param databaseName the db name + * @param tableName the table name + * @param dataColumns schema of columns which contain data + * @param partitionColumns schema of partition columns + * @param storerInfo information about storage descriptor + * @param table hive metastore table class + */ + HCatTableInfo( + String databaseName, + String tableName, + HCatSchema dataColumns, + HCatSchema partitionColumns, + StorerInfo storerInfo, + Table table) { + this.databaseName = (databaseName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : databaseName; + this.tableName = tableName; + this.dataColumns = dataColumns; + this.table = table; + this.storerInfo = storerInfo; + this.partitionColumns = partitionColumns; + } + + /** + * Gets the value of databaseName + * @return the databaseName + */ + public String getDatabaseName() { + return databaseName; + } + + /** + * Gets the value of tableName + * @return the tableName + */ + public String getTableName() { + return tableName; + } + + /** + * @return return schema of data columns as defined in meta store + */ + public HCatSchema getDataColumns() { + return dataColumns; + } + + /** + * @return schema of partition columns + */ + public HCatSchema getPartitionColumns() { + return partitionColumns; + } + + /** + * @return the storerInfo + */ + public StorerInfo getStorerInfo() { + return storerInfo; + } + + public String getTableLocation() { + return table.getSd().getLocation(); + } + + /** + * minimize dependency on hive classes so this is package private + * this should eventually no longer be used + * @return hive metastore representation of table + */ + Table getTable() { + return table; + } + + /** + * create an HCatTableInfo instance from the supplied Hive Table instance + * @param table to create an instance from + * @return HCatTableInfo + * @throws IOException + */ + static HCatTableInfo valueOf(Table table) throws IOException { + // Explicitly use {@link org.apache.hadoop.hive.ql.metadata.Table} when getting the schema, + // but store @{link org.apache.hadoop.hive.metastore.api.Table} as this class is serialized + // into the job conf. + org.apache.hadoop.hive.ql.metadata.Table mTable = + new org.apache.hadoop.hive.ql.metadata.Table(table); + HCatSchema schema = HCatUtil.extractSchema(mTable); + StorerInfo storerInfo = + InternalUtil.extractStorerInfo(table.getSd(), table.getParameters()); + HCatSchema partitionColumns = HCatUtil.getPartitionColumns(mTable); + return new HCatTableInfo(table.getDbName(), table.getTableName(), schema, + partitionColumns, storerInfo, table); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + HCatTableInfo tableInfo = (HCatTableInfo) o; + + if (dataColumns != null ? !dataColumns.equals(tableInfo.dataColumns) : tableInfo.dataColumns != null) + return false; + if (databaseName != null ? !databaseName.equals(tableInfo.databaseName) : tableInfo.databaseName != null) + return false; + if (partitionColumns != null ? !partitionColumns.equals(tableInfo.partitionColumns) : tableInfo.partitionColumns != null) + return false; + if (storerInfo != null ? !storerInfo.equals(tableInfo.storerInfo) : tableInfo.storerInfo != null) return false; + if (table != null ? !table.equals(tableInfo.table) : tableInfo.table != null) return false; + if (tableName != null ? !tableName.equals(tableInfo.tableName) : tableInfo.tableName != null) return false; + + return true; + } + + + @Override + public int hashCode() { + int result = databaseName != null ? databaseName.hashCode() : 0; + result = 31 * result + (tableName != null ? tableName.hashCode() : 0); + result = 31 * result + (dataColumns != null ? dataColumns.hashCode() : 0); + result = 31 * result + (partitionColumns != null ? partitionColumns.hashCode() : 0); + result = 31 * result + (table != null ? table.hashCode() : 0); + result = 31 * result + (storerInfo != null ? storerInfo.hashCode() : 0); + return result; + } + +} + diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InitializeInput.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InitializeInput.java new file mode 100644 index 0000000..fae5573 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InitializeInput.java @@ -0,0 +1,171 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.mapreduce; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hive.hcatalog.common.ErrorType; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The Class which handles querying the metadata server using the MetaStoreClient. The list of + * partitions matching the partition filter is fetched from the server and the information is + * serialized and written into the JobContext configuration. The inputInfo is also updated with + * info required in the client process context. + */ +class InitializeInput { + + private static final Logger LOG = LoggerFactory.getLogger(InitializeInput.class); + + /** + * @see org.apache.hive.hcatalog.mapreduce.InitializeInput#setInput(org.apache.hadoop.conf.Configuration, InputJobInfo) + */ + public static void setInput(Job job, InputJobInfo theirInputJobInfo) throws Exception { + setInput(job.getConfiguration(), theirInputJobInfo); + } + + /** + * Set the input to use for the Job. This queries the metadata server with the specified + * partition predicates, gets the matching partitions, and puts the information in the job + * configuration object. + * + * To ensure a known InputJobInfo state, only the database name, table name, filter, and + * properties are preserved. All other modification from the given InputJobInfo are discarded. + * + * After calling setInput, InputJobInfo can be retrieved from the job configuration as follows: + * {code} + * InputJobInfo inputInfo = (InputJobInfo) HCatUtil.deserialize( + * job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)); + * {code} + * + * @param conf the job Configuration object + * @param theirInputJobInfo information on the Input to read + * @throws Exception + */ + public static void setInput(Configuration conf, + InputJobInfo theirInputJobInfo) throws Exception { + InputJobInfo inputJobInfo = InputJobInfo.create( + theirInputJobInfo.getDatabaseName(), + theirInputJobInfo.getTableName(), + theirInputJobInfo.getFilter(), + theirInputJobInfo.getProperties()); + conf.set( + HCatConstants.HCAT_KEY_JOB_INFO, + HCatUtil.serialize(getInputJobInfo(conf, inputJobInfo, null))); + } + + /** + * Returns the given InputJobInfo after populating with data queried from the metadata service. + */ + private static InputJobInfo getInputJobInfo( + Configuration conf, InputJobInfo inputJobInfo, String locationFilter) throws Exception { + HiveMetaStoreClient client = null; + HiveConf hiveConf = null; + try { + if (conf != null) { + hiveConf = HCatUtil.getHiveConf(conf); + } else { + hiveConf = new HiveConf(HCatInputFormat.class); + } + client = HCatUtil.getHiveClient(hiveConf); + Table table = HCatUtil.getTable(client, inputJobInfo.getDatabaseName(), + inputJobInfo.getTableName()); + + List partInfoList = new ArrayList(); + + inputJobInfo.setTableInfo(HCatTableInfo.valueOf(table.getTTable())); + if (table.getPartitionKeys().size() != 0) { + //Partitioned table + List parts = client.listPartitionsByFilter(inputJobInfo.getDatabaseName(), + inputJobInfo.getTableName(), + inputJobInfo.getFilter(), + (short) -1); + + // Default to 100,000 partitions if hive.metastore.maxpartition is not defined + int maxPart = hiveConf.getInt("hcat.metastore.maxpartitions", 100000); + if (parts != null && parts.size() > maxPart) { + throw new HCatException(ErrorType.ERROR_EXCEED_MAXPART, "total number of partitions is " + parts.size()); + } + + // populate partition info + for (Partition ptn : parts) { + HCatSchema schema = HCatUtil.extractSchema( + new org.apache.hadoop.hive.ql.metadata.Partition(table, ptn)); + PartInfo partInfo = extractPartInfo(schema, ptn.getSd(), + ptn.getParameters(), conf, inputJobInfo); + partInfo.setPartitionValues(InternalUtil.createPtnKeyValueMap(table, ptn)); + partInfoList.add(partInfo); + } + + } else { + //Non partitioned table + HCatSchema schema = HCatUtil.extractSchema(table); + PartInfo partInfo = extractPartInfo(schema, table.getTTable().getSd(), + table.getParameters(), conf, inputJobInfo); + partInfo.setPartitionValues(new HashMap()); + partInfoList.add(partInfo); + } + inputJobInfo.setPartitions(partInfoList); + + return inputJobInfo; + } finally { + HCatUtil.closeHiveClientQuietly(client); + } + + } + + private static PartInfo extractPartInfo(HCatSchema schema, StorageDescriptor sd, + Map parameters, Configuration conf, + InputJobInfo inputJobInfo) throws IOException { + + StorerInfo storerInfo = InternalUtil.extractStorerInfo(sd, parameters); + + Properties hcatProperties = new Properties(); + HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(conf, storerInfo); + + // copy the properties from storageHandler to jobProperties + Map jobProperties = HCatUtil.getInputJobProperties(storageHandler, inputJobInfo); + + for (String key : parameters.keySet()) { + hcatProperties.put(key, parameters.get(key)); + } + // FIXME + // Bloating partinfo with inputJobInfo is not good + return new PartInfo(schema, storageHandler, sd.getLocation(), + hcatProperties, jobProperties, inputJobInfo.getTableInfo()); + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InputJobInfo.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InputJobInfo.java new file mode 100644 index 0000000..9478f31 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InputJobInfo.java @@ -0,0 +1,201 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.mapreduce; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; + +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.Serializable; +import java.util.List; +import java.util.Properties; +import java.util.zip.Deflater; +import java.util.zip.DeflaterOutputStream; +import java.util.zip.InflaterInputStream; + +/** + * Container for metadata read from the metadata server. + * Prior to release 0.5, InputJobInfo was a key part of the public API, exposed directly + * to end-users as an argument to + * {@link HCatInputFormat#setInput(org.apache.hadoop.mapreduce.Job, InputJobInfo)}. + * Going forward, we plan on treating InputJobInfo as an implementation detail and no longer + * expose to end-users. Should you have a need to use InputJobInfo outside HCatalog itself, + * please contact the developer mailing list before depending on this class. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class InputJobInfo implements Serializable { + + /** The serialization version */ + private static final long serialVersionUID = 1L; + + /** The db and table names. */ + private final String databaseName; + private final String tableName; + + /** meta information of the table to be read from */ + private HCatTableInfo tableInfo; + + /** The partition filter */ + private String filter; + + /** The list of partitions matching the filter. */ + transient private List partitions; + + /** implementation specific job properties */ + private Properties properties; + + /** + * Initializes a new InputJobInfo + * for reading data from a table. + * @param databaseName the db name + * @param tableName the table name + * @param filter the partition filter + * @param properties implementation specific job properties + */ + public static InputJobInfo create(String databaseName, + String tableName, + String filter, + Properties properties) { + return new InputJobInfo(databaseName, tableName, filter, properties); + } + + /** + * Initializes a new InputJobInfo + * for reading data from a table. + * @param databaseName the db name + * @param tableName the table name + * @param filter the partition filter + */ + @Deprecated + public static InputJobInfo create(String databaseName, + String tableName, + String filter) { + return create(databaseName, tableName, filter, null); + } + + + private InputJobInfo(String databaseName, + String tableName, + String filter, + Properties properties) { + this.databaseName = (databaseName == null) ? + MetaStoreUtils.DEFAULT_DATABASE_NAME : databaseName; + this.tableName = tableName; + this.filter = filter; + this.properties = properties == null ? new Properties() : properties; + } + + /** + * Gets the value of databaseName + * @return the databaseName + */ + public String getDatabaseName() { + return databaseName; + } + + /** + * Gets the value of tableName + * @return the tableName + */ + public String getTableName() { + return tableName; + } + + /** + * Gets the table's meta information + * @return the HCatTableInfo + */ + public HCatTableInfo getTableInfo() { + return tableInfo; + } + + /** + * set the tablInfo instance + * this should be the same instance + * determined by this object's DatabaseName and TableName + * @param tableInfo + */ + void setTableInfo(HCatTableInfo tableInfo) { + this.tableInfo = tableInfo; + } + + /** + * Gets the value of partition filter + * @return the filter string + */ + public String getFilter() { + return filter; + } + + /** + * @return partition info + */ + public List getPartitions() { + return partitions; + } + + /** + * @return partition info list + */ + void setPartitions(List partitions) { + this.partitions = partitions; + } + + /** + * Set/Get Property information to be passed down to *StorageHandler implementation + * put implementation specific storage handler configurations here + * @return the implementation specific job properties + */ + public Properties getProperties() { + return properties; + } + + /** + * Serialize this object, compressing the partitions which can exceed the + * allowed jobConf size. + * @see HCATALOG-453 + */ + private void writeObject(ObjectOutputStream oos) + throws IOException { + oos.defaultWriteObject(); + Deflater def = new Deflater(Deflater.BEST_COMPRESSION); + ObjectOutputStream partInfoWriter = + new ObjectOutputStream(new DeflaterOutputStream(oos, def)); + partInfoWriter.writeObject(partitions); + partInfoWriter.close(); + } + + /** + * Deserialize this object, decompressing the partitions which can exceed the + * allowed jobConf size. + * @see HCATALOG-453 + */ + @SuppressWarnings("unchecked") + private void readObject(ObjectInputStream ois) + throws IOException, ClassNotFoundException { + ois.defaultReadObject(); + ObjectInputStream partInfoReader = + new ObjectInputStream(new InflaterInputStream(ois)); + partitions = (List)partInfoReader.readObject(); + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InternalUtil.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InternalUtil.java new file mode 100644 index 0000000..996b8f4 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/InternalUtil.java @@ -0,0 +1,218 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +class InternalUtil { + private static final Logger LOG = LoggerFactory.getLogger(InternalUtil.class); + + static StorerInfo extractStorerInfo(StorageDescriptor sd, Map properties) throws IOException { + Properties hcatProperties = new Properties(); + for (String key : properties.keySet()) { + hcatProperties.put(key, properties.get(key)); + } + + // also populate with StorageDescriptor->SerDe.Parameters + for (Map.Entry param : + sd.getSerdeInfo().getParameters().entrySet()) { + hcatProperties.put(param.getKey(), param.getValue()); + } + + + return new StorerInfo( + sd.getInputFormat(), sd.getOutputFormat(), sd.getSerdeInfo().getSerializationLib(), + properties.get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE), + hcatProperties); + } + + static StructObjectInspector createStructObjectInspector(HCatSchema outputSchema) throws IOException { + + if (outputSchema == null) { + throw new IOException("Invalid output schema specified"); + } + + List fieldInspectors = new ArrayList(); + List fieldNames = new ArrayList(); + + for (HCatFieldSchema hcatFieldSchema : outputSchema.getFields()) { + TypeInfo type = TypeInfoUtils.getTypeInfoFromTypeString(hcatFieldSchema.getTypeString()); + + fieldNames.add(hcatFieldSchema.getName()); + fieldInspectors.add(getObjectInspector(type)); + } + + StructObjectInspector structInspector = ObjectInspectorFactory. + getStandardStructObjectInspector(fieldNames, fieldInspectors); + return structInspector; + } + + private static ObjectInspector getObjectInspector(TypeInfo type) throws IOException { + + switch (type.getCategory()) { + + case PRIMITIVE: + PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) type; + return PrimitiveObjectInspectorFactory. + getPrimitiveJavaObjectInspector(primitiveType.getPrimitiveCategory()); + + case MAP: + MapTypeInfo mapType = (MapTypeInfo) type; + MapObjectInspector mapInspector = ObjectInspectorFactory.getStandardMapObjectInspector( + getObjectInspector(mapType.getMapKeyTypeInfo()), getObjectInspector(mapType.getMapValueTypeInfo())); + return mapInspector; + + case LIST: + ListTypeInfo listType = (ListTypeInfo) type; + ListObjectInspector listInspector = ObjectInspectorFactory.getStandardListObjectInspector( + getObjectInspector(listType.getListElementTypeInfo())); + return listInspector; + + case STRUCT: + StructTypeInfo structType = (StructTypeInfo) type; + List fieldTypes = structType.getAllStructFieldTypeInfos(); + + List fieldInspectors = new ArrayList(); + for (TypeInfo fieldType : fieldTypes) { + fieldInspectors.add(getObjectInspector(fieldType)); + } + + StructObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector( + structType.getAllStructFieldNames(), fieldInspectors); + return structInspector; + + default: + throw new IOException("Unknown field schema type"); + } + } + + //TODO this has to find a better home, it's also hardcoded as default in hive would be nice + // if the default was decided by the serde + static void initializeOutputSerDe(SerDe serDe, Configuration conf, OutputJobInfo jobInfo) + throws SerDeException { + serDe.initialize(conf, getSerdeProperties(jobInfo.getTableInfo(), jobInfo.getOutputSchema())); + } + + static void initializeDeserializer(Deserializer deserializer, Configuration conf, + HCatTableInfo info, HCatSchema schema) throws SerDeException { + Properties props = getSerdeProperties(info, schema); + LOG.info("Initializing " + deserializer.getClass().getName() + " with properties " + props); + deserializer.initialize(conf, props); + } + + private static Properties getSerdeProperties(HCatTableInfo info, HCatSchema s) + throws SerDeException { + Properties props = new Properties(); + List fields = HCatUtil.getFieldSchemaList(s.getFields()); + props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMNS, + MetaStoreUtils.getColumnNamesFromFieldSchema(fields)); + props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.LIST_COLUMN_TYPES, + MetaStoreUtils.getColumnTypesFromFieldSchema(fields)); + + // setting these props to match LazySimpleSerde + props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_NULL_FORMAT, "\\N"); + props.setProperty(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT, "1"); + + //add props from params set in table schema + props.putAll(info.getStorerInfo().getProperties()); + + return props; + } + + static Reporter createReporter(TaskAttemptContext context) { + return new ProgressReporter(context); + } + + /** + * Casts an InputSplit into a HCatSplit, providing a useful error message if the cast fails. + * @param split the InputSplit + * @return the HCatSplit + * @throws IOException + */ + public static HCatSplit castToHCatSplit(InputSplit split) throws IOException { + if (split instanceof HCatSplit) { + return (HCatSplit) split; + } else { + throw new IOException("Split must be " + HCatSplit.class.getName() + + " but found " + split.getClass().getName()); + } + } + + + static Map createPtnKeyValueMap(Table table, Partition ptn) + throws IOException { + List values = ptn.getValues(); + if (values.size() != table.getPartitionKeys().size()) { + throw new IOException( + "Partition values in partition inconsistent with table definition, table " + + table.getTableName() + " has " + + table.getPartitionKeys().size() + + " partition keys, partition has " + values.size() + + "partition values"); + } + + Map ptnKeyValues = new HashMap(); + + int i = 0; + for (FieldSchema schema : table.getPartitionKeys()) { + // CONCERN : the way this mapping goes, the order *needs* to be + // preserved for table.getPartitionKeys() and ptn.getValues() + ptnKeyValues.put(schema.getName().toLowerCase(), values.get(i)); + i++; + } + + return ptnKeyValues; + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/MultiOutputFormat.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/MultiOutputFormat.java new file mode 100644 index 0000000..a2c670f --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/MultiOutputFormat.java @@ -0,0 +1,623 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.shims.HadoopShims; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.JobStatus.State; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskInputOutputContext; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The MultiOutputFormat class simplifies writing output data to multiple + * outputs. + *

+ * Multiple output formats can be defined each with its own + * OutputFormat class, own key class and own value class. Any + * configuration on these output format classes can be done without interfering + * with other output format's configuration. + *

+ * Usage pattern for job submission: + * + *

+ *
+ * Job job = new Job();
+ *
+ * FileInputFormat.setInputPath(job, inDir);
+ *
+ * job.setMapperClass(WordCountMap.class);
+ * job.setReducerClass(WordCountReduce.class);
+ * job.setInputFormatClass(TextInputFormat.class);
+ * job.setOutputFormatClass(MultiOutputFormat.class);
+ * // Need not define OutputKeyClass and OutputValueClass. They default to
+ * // Writable.class
+ * job.setMapOutputKeyClass(Text.class);
+ * job.setMapOutputValueClass(IntWritable.class);
+ *
+ *
+ * // Create a JobConfigurer that will configure the job with the multiple
+ * // output format information.
+ * JobConfigurer configurer = MultiOutputFormat.createConfigurer(job);
+ *
+ * // Defines additional single text based output 'text' for the job.
+ * // Any configuration for the defined OutputFormat should be done with
+ * // the Job obtained with configurer.getJob() method.
+ * configurer.addOutputFormat("text", TextOutputFormat.class,
+ *                 IntWritable.class, Text.class);
+ * FileOutputFormat.setOutputPath(configurer.getJob("text"), textOutDir);
+ *
+ * // Defines additional sequence-file based output 'sequence' for the job
+ * configurer.addOutputFormat("sequence", SequenceFileOutputFormat.class,
+ *                 Text.class, IntWritable.class);
+ * FileOutputFormat.setOutputPath(configurer.getJob("sequence"), seqOutDir);
+ * ...
+ * // configure method to be called on the JobConfigurer once all the
+ * // output formats have been defined and configured.
+ * configurer.configure();
+ *
+ * job.waitForCompletion(true);
+ * ...
+ * 
+ *

+ * Usage in Reducer: + * + *

+ * public class WordCountReduce extends
+ *         Reducer<Text, IntWritable, Writable, Writable> {
+ *
+ *     private IntWritable count = new IntWritable();
+ *
+ *     public void reduce(Text word, Iterator<IntWritable> values,
+ *             Context context)
+ *             throws IOException {
+ *         int sum = 0;
+ *         for (IntWritable val : values) {
+ *             sum += val.get();
+ *         }
+ *         count.set(sum);
+ *         MultiOutputFormat.write("text", count, word, context);
+ *         MultiOutputFormat.write("sequence", word, count, context);
+ *     }
+ *
+ * }
+ *
+ * 
+ * + * Map only jobs: + *

+ * MultiOutputFormat.write("output", key, value, context); can be called similar + * to a reducer in map only jobs. + * + */ +public class MultiOutputFormat extends OutputFormat { + + private static final Logger LOGGER = LoggerFactory.getLogger(MultiOutputFormat.class.getName()); + private static final String MO_ALIASES = "mapreduce.multiout.aliases"; + private static final String MO_ALIAS = "mapreduce.multiout.alias"; + private static final String CONF_KEY_DELIM = "%%"; + private static final String CONF_VALUE_DELIM = ";;"; + private static final String COMMA_DELIM = ","; + private static final List configsToOverride = new ArrayList(); + private static final Map configsToMerge = new HashMap(); + + static { + configsToOverride.add("mapred.output.dir"); + configsToOverride.add(ShimLoader.getHadoopShims().getHCatShim().getPropertyName( + HadoopShims.HCatHadoopShims.PropertyName.CACHE_SYMLINK)); + configsToMerge.put(JobContext.JOB_NAMENODES, COMMA_DELIM); + configsToMerge.put("tmpfiles", COMMA_DELIM); + configsToMerge.put("tmpjars", COMMA_DELIM); + configsToMerge.put("tmparchives", COMMA_DELIM); + configsToMerge.put(ShimLoader.getHadoopShims().getHCatShim().getPropertyName( + HadoopShims.HCatHadoopShims.PropertyName.CACHE_ARCHIVES), COMMA_DELIM); + configsToMerge.put(ShimLoader.getHadoopShims().getHCatShim().getPropertyName( + HadoopShims.HCatHadoopShims.PropertyName.CACHE_FILES), COMMA_DELIM); + String fileSep; + if (HCatUtil.isHadoop23()) { + fileSep = ","; + } else { + fileSep = System.getProperty("path.separator"); + } + configsToMerge.put("mapred.job.classpath.archives", fileSep); + configsToMerge.put("mapred.job.classpath.files", fileSep); + } + + /** + * Get a JobConfigurer instance that will support configuration of the job + * for multiple output formats. + * + * @param job the mapreduce job to be submitted + * @return JobConfigurer + */ + public static JobConfigurer createConfigurer(Job job) { + return JobConfigurer.create(job); + } + + /** + * Get the JobContext with the related OutputFormat configuration populated given the alias + * and the actual JobContext + * @param alias the name given to the OutputFormat configuration + * @param context the JobContext + * @return a copy of the JobContext with the alias configuration populated + */ + public static JobContext getJobContext(String alias, JobContext context) { + String aliasConf = context.getConfiguration().get(getAliasConfName(alias)); + JobContext aliasContext = ShimLoader.getHadoopShims().getHCatShim().createJobContext( + context.getConfiguration(), context.getJobID()); + addToConfig(aliasConf, aliasContext.getConfiguration()); + return aliasContext; + } + + /** + * Get the TaskAttemptContext with the related OutputFormat configuration populated given the alias + * and the actual TaskAttemptContext + * @param alias the name given to the OutputFormat configuration + * @param context the Mapper or Reducer Context + * @return a copy of the TaskAttemptContext with the alias configuration populated + */ + public static TaskAttemptContext getTaskAttemptContext(String alias, TaskAttemptContext context) { + String aliasConf = context.getConfiguration().get(getAliasConfName(alias)); + TaskAttemptContext aliasContext = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext( + context.getConfiguration(), context.getTaskAttemptID()); + addToConfig(aliasConf, aliasContext.getConfiguration()); + return aliasContext; + } + + /** + * Write the output key and value using the OutputFormat defined by the + * alias. + * + * @param alias the name given to the OutputFormat configuration + * @param key the output key to be written + * @param value the output value to be written + * @param context the Mapper or Reducer Context + * @throws IOException + * @throws InterruptedException + */ + public static void write(String alias, K key, V value, TaskInputOutputContext context) + throws IOException, InterruptedException { + KeyValue keyval = new KeyValue(key, value); + context.write(new Text(alias), keyval); + } + + @Override + public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException { + for (String alias : getOutputFormatAliases(context)) { + LOGGER.debug("Calling checkOutputSpecs for alias: " + alias); + JobContext aliasContext = getJobContext(alias, context); + OutputFormat outputFormat = getOutputFormatInstance(aliasContext); + outputFormat.checkOutputSpecs(aliasContext); + // Copy credentials and any new config added back to JobContext + context.getCredentials().addAll(aliasContext.getCredentials()); + setAliasConf(alias, context, aliasContext); + } + } + + @Override + public RecordWriter getRecordWriter(TaskAttemptContext context) + throws IOException, + InterruptedException { + return new MultiRecordWriter(context); + } + + @Override + public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, + InterruptedException { + return new MultiOutputCommitter(context); + } + + private static OutputFormat getOutputFormatInstance(JobContext context) { + OutputFormat outputFormat; + try { + outputFormat = ReflectionUtils.newInstance(context.getOutputFormatClass(), + context.getConfiguration()); + } catch (ClassNotFoundException e) { + throw new IllegalStateException(e); + } + return outputFormat; + } + + private static String[] getOutputFormatAliases(JobContext context) { + return context.getConfiguration().getStrings(MO_ALIASES); + } + + /** + * Compare the aliasContext with userJob and add the differing configuration + * as mapreduce.multiout.alias..conf to the userJob. + *

+ * Merge config like tmpjars, tmpfile, tmparchives, + * mapreduce.job.hdfs-servers that are directly handled by JobClient and add + * them to userJob. + *

+ * Add mapred.output.dir config to userJob. + * + * @param alias alias name associated with a OutputFormat + * @param userJob reference to Job that the user is going to submit + * @param aliasContext JobContext populated with OutputFormat related + * configuration. + */ + private static void setAliasConf(String alias, JobContext userJob, JobContext aliasContext) { + Configuration userConf = userJob.getConfiguration(); + StringBuilder builder = new StringBuilder(); + for (Entry conf : aliasContext.getConfiguration()) { + String key = conf.getKey(); + String value = conf.getValue(); + String jobValue = userConf.getRaw(key); + if (jobValue == null || !jobValue.equals(value)) { + if (configsToMerge.containsKey(key)) { + String mergedValue = getMergedConfValue(jobValue, value, configsToMerge.get(key)); + userConf.set(key, mergedValue); + } else { + if (configsToOverride.contains(key)) { + userConf.set(key, value); + } + builder.append(key).append(CONF_KEY_DELIM).append(value) + .append(CONF_VALUE_DELIM); + } + } + } + if (builder.length() > CONF_VALUE_DELIM.length()) { + builder.delete(builder.length() - CONF_VALUE_DELIM.length(), builder.length()); + userConf.set(getAliasConfName(alias), builder.toString()); + } + } + + private static String getMergedConfValue(String originalValues, String newValues, String separator) { + if (originalValues == null) { + return newValues; + } + Set mergedValues = new LinkedHashSet(); + mergedValues.addAll(Arrays.asList(StringUtils.split(originalValues, separator))); + mergedValues.addAll(Arrays.asList(StringUtils.split(newValues, separator))); + StringBuilder builder = new StringBuilder(originalValues.length() + newValues.length() + 2); + for (String value : mergedValues) { + builder.append(value).append(separator); + } + return builder.substring(0, builder.length() - separator.length()); + } + + private static String getAliasConfName(String alias) { + return MO_ALIAS + "." + alias + ".conf"; + } + + private static void addToConfig(String aliasConf, Configuration conf) { + String[] config = aliasConf.split(CONF_KEY_DELIM + "|" + CONF_VALUE_DELIM); + for (int i = 0; i < config.length; i += 2) { + conf.set(config[i], config[i + 1]); + } + } + + /** + * Class that supports configuration of the job for multiple output formats. + */ + public static class JobConfigurer { + + private final Job job; + private Map outputConfigs = new LinkedHashMap(); + + private JobConfigurer(Job job) { + this.job = job; + } + + private static JobConfigurer create(Job job) { + JobConfigurer configurer = new JobConfigurer(job); + return configurer; + } + + /** + * Add a OutputFormat configuration to the Job with a alias name. + * + * @param alias the name to be given to the OutputFormat configuration + * @param outputFormatClass OutputFormat class + * @param keyClass the key class for the output data + * @param valueClass the value class for the output data + * @throws IOException + */ + public void addOutputFormat(String alias, + Class outputFormatClass, + Class keyClass, Class valueClass) throws IOException { + Job copy = new Job(this.job.getConfiguration()); + outputConfigs.put(alias, copy); + copy.setOutputFormatClass(outputFormatClass); + copy.setOutputKeyClass(keyClass); + copy.setOutputValueClass(valueClass); + } + + /** + * Get the Job configuration for a OutputFormat defined by the alias + * name. The job returned by this method should be passed to the + * OutputFormat for any configuration instead of the Job that will be + * submitted to the JobClient. + * + * @param alias the name used for the OutputFormat during + * addOutputFormat + * @return Job + */ + public Job getJob(String alias) { + Job copy = outputConfigs.get(alias); + if (copy == null) { + throw new IllegalArgumentException("OutputFormat with alias " + alias + + " has not beed added"); + } + return copy; + } + + /** + * Configure the job with the multiple output formats added. This method + * should be called after all the output formats have been added and + * configured and before the job submission. + */ + public void configure() { + StringBuilder aliases = new StringBuilder(); + Configuration jobConf = job.getConfiguration(); + for (Entry entry : outputConfigs.entrySet()) { + // Copy credentials + job.getCredentials().addAll(entry.getValue().getCredentials()); + String alias = entry.getKey(); + aliases.append(alias).append(COMMA_DELIM); + // Store the differing configuration for each alias in the job + // as a setting. + setAliasConf(alias, job, entry.getValue()); + } + aliases.delete(aliases.length() - COMMA_DELIM.length(), aliases.length()); + jobConf.set(MO_ALIASES, aliases.toString()); + } + + } + + private static class KeyValue implements Writable { + private final K key; + private final V value; + + public KeyValue(K key, V value) { + this.key = key; + this.value = value; + } + + public K getKey() { + return key; + } + + public V getValue() { + return value; + } + + @Override + public void write(DataOutput out) throws IOException { + // Ignore. Not required as this will be never + // serialized/deserialized. + } + + @Override + public void readFields(DataInput in) throws IOException { + // Ignore. Not required as this will be never + // serialized/deserialized. + } + } + + private static class MultiRecordWriter extends RecordWriter { + + private final Map baseRecordWriters; + + public MultiRecordWriter(TaskAttemptContext context) throws IOException, + InterruptedException { + baseRecordWriters = new LinkedHashMap(); + String[] aliases = getOutputFormatAliases(context); + for (String alias : aliases) { + LOGGER.info("Creating record writer for alias: " + alias); + TaskAttemptContext aliasContext = getTaskAttemptContext(alias, context); + Configuration aliasConf = aliasContext.getConfiguration(); + // Create output directory if not already created. + String outDir = aliasConf.get("mapred.output.dir"); + if (outDir != null) { + Path outputDir = new Path(outDir); + FileSystem fs = outputDir.getFileSystem(aliasConf); + if (!fs.exists(outputDir)) { + fs.mkdirs(outputDir); + } + } + OutputFormat outputFormat = getOutputFormatInstance(aliasContext); + baseRecordWriters.put(alias, + new BaseRecordWriterContainer(outputFormat.getRecordWriter(aliasContext), + aliasContext)); + } + } + + @Override + public void write(Writable key, Writable value) throws IOException, InterruptedException { + Text _key = (Text) key; + KeyValue _value = (KeyValue) value; + String alias = new String(_key.getBytes(), 0, _key.getLength()); + BaseRecordWriterContainer baseRWContainer = baseRecordWriters.get(alias); + if (baseRWContainer == null) { + throw new IllegalArgumentException("OutputFormat with alias " + alias + + " has not been added"); + } + baseRWContainer.getRecordWriter().write(_value.getKey(), _value.getValue()); + } + + @Override + public void close(TaskAttemptContext context) throws IOException, InterruptedException { + for (Entry entry : baseRecordWriters.entrySet()) { + BaseRecordWriterContainer baseRWContainer = entry.getValue(); + LOGGER.info("Closing record writer for alias: " + entry.getKey()); + baseRWContainer.getRecordWriter().close(baseRWContainer.getContext()); + } + } + + } + + private static class BaseRecordWriterContainer { + + private final RecordWriter recordWriter; + private final TaskAttemptContext context; + + public BaseRecordWriterContainer(RecordWriter recordWriter, TaskAttemptContext context) { + this.recordWriter = recordWriter; + this.context = context; + } + + public RecordWriter getRecordWriter() { + return recordWriter; + } + + public TaskAttemptContext getContext() { + return context; + } + } + + public class MultiOutputCommitter extends OutputCommitter { + + private final Map outputCommitters; + + public MultiOutputCommitter(TaskAttemptContext context) throws IOException, + InterruptedException { + outputCommitters = new LinkedHashMap(); + String[] aliases = getOutputFormatAliases(context); + for (String alias : aliases) { + LOGGER.info("Creating output committer for alias: " + alias); + TaskAttemptContext aliasContext = getTaskAttemptContext(alias, context); + OutputCommitter baseCommitter = getOutputFormatInstance(aliasContext) + .getOutputCommitter(aliasContext); + outputCommitters.put(alias, + new BaseOutputCommitterContainer(baseCommitter, aliasContext)); + } + } + + @Override + public void setupJob(JobContext jobContext) throws IOException { + for (String alias : outputCommitters.keySet()) { + LOGGER.info("Calling setupJob for alias: " + alias); + BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); + outputContainer.getBaseCommitter().setupJob(outputContainer.getContext()); + } + } + + @Override + public void setupTask(TaskAttemptContext taskContext) throws IOException { + for (String alias : outputCommitters.keySet()) { + LOGGER.info("Calling setupTask for alias: " + alias); + BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); + outputContainer.getBaseCommitter().setupTask(outputContainer.getContext()); + } + } + + @Override + public boolean needsTaskCommit(TaskAttemptContext taskContext) throws IOException { + boolean needTaskCommit = false; + for (String alias : outputCommitters.keySet()) { + BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); + needTaskCommit = needTaskCommit + || outputContainer.getBaseCommitter().needsTaskCommit( + outputContainer.getContext()); + } + return needTaskCommit; + } + + @Override + public void commitTask(TaskAttemptContext taskContext) throws IOException { + for (String alias : outputCommitters.keySet()) { + BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); + OutputCommitter baseCommitter = outputContainer.getBaseCommitter(); + TaskAttemptContext committerContext = outputContainer.getContext(); + if (baseCommitter.needsTaskCommit(committerContext)) { + LOGGER.info("Calling commitTask for alias: " + alias); + baseCommitter.commitTask(committerContext); + } + } + } + + @Override + public void abortTask(TaskAttemptContext taskContext) throws IOException { + for (String alias : outputCommitters.keySet()) { + LOGGER.info("Calling abortTask for alias: " + alias); + BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); + outputContainer.getBaseCommitter().abortTask(outputContainer.getContext()); + } + } + + @Override + public void commitJob(JobContext jobContext) throws IOException { + for (String alias : outputCommitters.keySet()) { + LOGGER.info("Calling commitJob for alias: " + alias); + BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); + outputContainer.getBaseCommitter().commitJob(outputContainer.getContext()); + } + } + + @Override + public void abortJob(JobContext jobContext, State state) throws IOException { + for (String alias : outputCommitters.keySet()) { + LOGGER.info("Calling abortJob for alias: " + alias); + BaseOutputCommitterContainer outputContainer = outputCommitters.get(alias); + outputContainer.getBaseCommitter().abortJob(outputContainer.getContext(), state); + } + } + } + + private static class BaseOutputCommitterContainer { + + private final OutputCommitter outputCommitter; + private final TaskAttemptContext context; + + public BaseOutputCommitterContainer(OutputCommitter outputCommitter, + TaskAttemptContext context) { + this.outputCommitter = outputCommitter; + this.context = context; + } + + public OutputCommitter getBaseCommitter() { + return outputCommitter; + } + + public TaskAttemptContext getContext() { + return context; + } + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/OutputCommitterContainer.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/OutputCommitterContainer.java new file mode 100644 index 0000000..3c68b0c --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/OutputCommitterContainer.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.OutputCommitter; + +/** + * This class will contain an implementation of an OutputCommitter. + * See {@link OutputFormatContainer} for more information about containers. + */ +abstract class OutputCommitterContainer extends OutputCommitter { + private final org.apache.hadoop.mapred.OutputCommitter committer; + + /** + * @param context current JobContext + * @param committer OutputCommitter that this instance will contain + */ + public OutputCommitterContainer(JobContext context, org.apache.hadoop.mapred.OutputCommitter committer) { + this.committer = committer; + } + + /** + * @return underlying OutputCommitter + */ + public OutputCommitter getBaseOutputCommitter() { + return committer; + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/OutputFormatContainer.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/OutputFormatContainer.java new file mode 100644 index 0000000..07b63dd --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/OutputFormatContainer.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.hive.hcatalog.data.HCatRecord; + +/** + * This container class is used to wrap OutputFormat implementations and augment them with + * behavior necessary to work with HCatalog (ie metastore updates, hcatalog delegation tokens, etc). + * Containers are also used to provide storage specific implementations of some HCatalog features (ie dynamic partitioning). + * Hence users wishing to create storage specific implementations of HCatalog features should implement this class and override + * HCatStorageHandler.getOutputFormatContainer(OutputFormat outputFormat) to return the implementation. + * By default DefaultOutputFormatContainer is used, which only implements the bare minimum features HCatalog features + * such as partitioning isn't supported. + */ +abstract class OutputFormatContainer extends OutputFormat, HCatRecord> { + private org.apache.hadoop.mapred.OutputFormat, ? super Writable> of; + + /** + * @param of OutputFormat this instance will contain + */ + public OutputFormatContainer(org.apache.hadoop.mapred.OutputFormat, ? super Writable> of) { + this.of = of; + } + + /** + * @return underlying OutputFormat + */ + public org.apache.hadoop.mapred.OutputFormat getBaseOutputFormat() { + return of; + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/OutputJobInfo.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/OutputJobInfo.java new file mode 100644 index 0000000..36c166a --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/OutputJobInfo.java @@ -0,0 +1,270 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import java.io.Serializable; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hive.hcatalog.data.schema.HCatSchema; + +/** The class used to serialize and store the output related information */ +public class OutputJobInfo implements Serializable { + + /** The db and table names. */ + private final String databaseName; + private final String tableName; + + /** The serialization version. */ + private static final long serialVersionUID = 1L; + + /** The table info provided by user. */ + private HCatTableInfo tableInfo; + + /** The output schema. This is given to us by user. This wont contain any + * partition columns ,even if user has specified them. + * */ + private HCatSchema outputSchema; + + /** The location of the partition being written */ + private String location; + + /** The partition values to publish to, if used for output*/ + private Map partitionValues; + + private List posOfPartCols; + private List posOfDynPartCols; + + private Properties properties; + + private int maxDynamicPartitions; + + /** List of keys for which values were not specified at write setup time, to be infered at write time */ + private List dynamicPartitioningKeys; + + private boolean harRequested; + + /** + * Initializes a new OutputJobInfo instance + * for writing data from a table. + * @param databaseName the db name + * @param tableName the table name + * @param partitionValues The partition values to publish to, can be null or empty Map to + * work with hadoop security, the kerberos principal name of the server - else null + * The principal name should be of the form: + * /_HOST@ like "hcat/_HOST@myrealm.com" + * The special string _HOST will be replaced automatically with the correct host name + * indicate write to a unpartitioned table. For partitioned tables, this map should + * contain keys for all partition columns with corresponding values. + */ + public static OutputJobInfo create(String databaseName, + String tableName, + Map partitionValues) { + return new OutputJobInfo(databaseName, + tableName, + partitionValues); + } + + private OutputJobInfo(String databaseName, + String tableName, + Map partitionValues) { + this.databaseName = (databaseName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : databaseName; + this.tableName = tableName; + this.partitionValues = partitionValues; + this.properties = new Properties(); + } + + /** + * @return the posOfPartCols + */ + protected List getPosOfPartCols() { + return posOfPartCols; + } + + /** + * @return the posOfDynPartCols + */ + protected List getPosOfDynPartCols() { + return posOfDynPartCols; + } + + /** + * @param posOfPartCols the posOfPartCols to set + */ + protected void setPosOfPartCols(List posOfPartCols) { + // sorting the list in the descending order so that deletes happen back-to-front + Collections.sort(posOfPartCols, new Comparator() { + @Override + public int compare(Integer earlier, Integer later) { + return (earlier > later) ? -1 : ((earlier == later) ? 0 : 1); + } + }); + this.posOfPartCols = posOfPartCols; + } + + /** + * @param posOfDynPartCols the posOfDynPartCols to set + */ + protected void setPosOfDynPartCols(List posOfDynPartCols) { + // Important - no sorting here! We retain order, it's used to match with values at runtime + this.posOfDynPartCols = posOfDynPartCols; + } + + /** + * @return the tableInfo + */ + public HCatTableInfo getTableInfo() { + return tableInfo; + } + + /** + * @return the outputSchema + */ + public HCatSchema getOutputSchema() { + return outputSchema; + } + + /** + * @param schema the outputSchema to set + */ + public void setOutputSchema(HCatSchema schema) { + this.outputSchema = schema; + } + + /** + * @return the location + */ + public String getLocation() { + return location; + } + + /** + * @param location location to write to + */ + public void setLocation(String location) { + this.location = location; + } + + /** + * Sets the value of partitionValues + * @param partitionValues the partition values to set + */ + void setPartitionValues(Map partitionValues) { + this.partitionValues = partitionValues; + } + + /** + * Gets the value of partitionValues + * @return the partitionValues + */ + public Map getPartitionValues() { + return partitionValues; + } + + /** + * set the tablInfo instance + * this should be the same instance + * determined by this object's DatabaseName and TableName + * @param tableInfo + */ + void setTableInfo(HCatTableInfo tableInfo) { + this.tableInfo = tableInfo; + } + + /** + * @return database name of table to write to + */ + public String getDatabaseName() { + return databaseName; + } + + /** + * @return name of table to write to + */ + public String getTableName() { + return tableName; + } + + /** + * Set/Get Property information to be passed down to *StorageHandler implementation + * put implementation specific storage handler configurations here + * @return the implementation specific job properties + */ + public Properties getProperties() { + return properties; + } + + /** + * Set maximum number of allowable dynamic partitions + * @param maxDynamicPartitions + */ + public void setMaximumDynamicPartitions(int maxDynamicPartitions) { + this.maxDynamicPartitions = maxDynamicPartitions; + } + + /** + * Returns maximum number of allowable dynamic partitions + * @return maximum number of allowable dynamic partitions + */ + public int getMaxDynamicPartitions() { + return this.maxDynamicPartitions; + } + + /** + * Sets whether or not hadoop archiving has been requested for this job + * @param harRequested + */ + public void setHarRequested(boolean harRequested) { + this.harRequested = harRequested; + } + + /** + * Returns whether or not hadoop archiving has been requested for this job + * @return whether or not hadoop archiving has been requested for this job + */ + public boolean getHarRequested() { + return this.harRequested; + } + + /** + * Returns whether or not Dynamic Partitioning is used + * @return whether or not dynamic partitioning is currently enabled and used + */ + public boolean isDynamicPartitioningUsed() { + return !((dynamicPartitioningKeys == null) || (dynamicPartitioningKeys.isEmpty())); + } + + /** + * Sets the list of dynamic partitioning keys used for outputting without specifying all the keys + * @param dynamicPartitioningKeys + */ + public void setDynamicPartitioningKeys(List dynamicPartitioningKeys) { + this.dynamicPartitioningKeys = dynamicPartitioningKeys; + } + + public List getDynamicPartitioningKeys() { + return this.dynamicPartitioningKeys; + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/PartInfo.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/PartInfo.java new file mode 100644 index 0000000..57b84b2 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/PartInfo.java @@ -0,0 +1,164 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.mapreduce; + +import java.io.Serializable; +import java.util.Map; +import java.util.Properties; + +import org.apache.hive.hcatalog.data.schema.HCatSchema; + +/** The Class used to serialize the partition information read from the metadata server that maps to a partition. */ +public class PartInfo implements Serializable { + + /** The serialization version */ + private static final long serialVersionUID = 1L; + + /** The partition schema. */ + private final HCatSchema partitionSchema; + + /** The information about which input storage handler to use */ + private final String storageHandlerClassName; + private final String inputFormatClassName; + private final String outputFormatClassName; + private final String serdeClassName; + + /** HCat-specific properties set at the partition */ + private final Properties hcatProperties; + + /** The data location. */ + private final String location; + + /** The map of partition key names and their values. */ + private Map partitionValues; + + /** Job properties associated with this parition */ + Map jobProperties; + + /** the table info associated with this partition */ + HCatTableInfo tableInfo; + + /** + * Instantiates a new hcat partition info. + * @param partitionSchema the partition schema + * @param storageHandler the storage handler + * @param location the location + * @param hcatProperties hcat-specific properties at the partition + * @param jobProperties the job properties + * @param tableInfo the table information + */ + public PartInfo(HCatSchema partitionSchema, HCatStorageHandler storageHandler, + String location, Properties hcatProperties, + Map jobProperties, HCatTableInfo tableInfo) { + this.partitionSchema = partitionSchema; + this.location = location; + this.hcatProperties = hcatProperties; + this.jobProperties = jobProperties; + this.tableInfo = tableInfo; + + this.storageHandlerClassName = storageHandler.getClass().getName(); + this.inputFormatClassName = storageHandler.getInputFormatClass().getName(); + this.serdeClassName = storageHandler.getSerDeClass().getName(); + this.outputFormatClassName = storageHandler.getOutputFormatClass().getName(); + } + + /** + * Gets the value of partitionSchema. + * @return the partitionSchema + */ + public HCatSchema getPartitionSchema() { + return partitionSchema; + } + + /** + * @return the storage handler class name + */ + public String getStorageHandlerClassName() { + return storageHandlerClassName; + } + + /** + * @return the inputFormatClassName + */ + public String getInputFormatClassName() { + return inputFormatClassName; + } + + /** + * @return the outputFormatClassName + */ + public String getOutputFormatClassName() { + return outputFormatClassName; + } + + /** + * @return the serdeClassName + */ + public String getSerdeClassName() { + return serdeClassName; + } + + /** + * Gets the input storage handler properties. + * @return HCat-specific properties set at the partition + */ + public Properties getInputStorageHandlerProperties() { + return hcatProperties; + } + + /** + * Gets the value of location. + * @return the location + */ + public String getLocation() { + return location; + } + + /** + * Sets the partition values. + * @param partitionValues the new partition values + */ + public void setPartitionValues(Map partitionValues) { + this.partitionValues = partitionValues; + } + + /** + * Gets the partition values. + * @return the partition values + */ + public Map getPartitionValues() { + return partitionValues; + } + + /** + * Gets the job properties. + * @return a map of the job properties + */ + public Map getJobProperties() { + return jobProperties; + } + + /** + * Gets the HCatalog table information. + * @return the table information + */ + public HCatTableInfo getTableInfo() { + return tableInfo; + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/ProgressReporter.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/ProgressReporter.java new file mode 100644 index 0000000..99a7ef3 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/ProgressReporter.java @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import org.apache.hadoop.mapred.Counters; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapreduce.StatusReporter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskInputOutputContext; + +class ProgressReporter extends StatusReporter implements Reporter { + + private TaskInputOutputContext context = null; + private TaskAttemptContext taskAttemptContext = null; + + public ProgressReporter(TaskAttemptContext context) { + if (context instanceof TaskInputOutputContext) { + this.context = (TaskInputOutputContext) context; + } else { + taskAttemptContext = context; + } + } + + @Override + public void setStatus(String status) { + if (context != null) { + context.setStatus(status); + } + } + + @Override + public Counters.Counter getCounter(Enum name) { + return (context != null) ? (Counters.Counter) context.getCounter(name) : null; + } + + @Override + public Counters.Counter getCounter(String group, String name) { + return (context != null) ? (Counters.Counter) context.getCounter(group, name) : null; + } + + @Override + public void incrCounter(Enum key, long amount) { + if (context != null) { + context.getCounter(key).increment(amount); + } + } + + @Override + public void incrCounter(String group, String counter, long amount) { + if (context != null) { + context.getCounter(group, counter).increment(amount); + } + } + + @Override + public InputSplit getInputSplit() throws UnsupportedOperationException { + return null; + } + + public float getProgress() { + /* Required to build against 0.23 Reporter and StatusReporter. */ + /* TODO: determine the progress. */ + return 0.0f; + } + + @Override + public void progress() { + if (context != null) { + context.progress(); + } else { + taskAttemptContext.progress(); + } + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/RecordWriterContainer.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/RecordWriterContainer.java new file mode 100644 index 0000000..fcae25b --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/RecordWriterContainer.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + + +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hive.hcatalog.data.HCatRecord; + +/** + * This class will contain an implementation of an RecordWriter. + * See {@link OutputFormatContainer} for more information about containers. + */ +abstract class RecordWriterContainer extends RecordWriter, HCatRecord> { + + private final org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseRecordWriter; + + /** + * @param context current JobContext + * @param baseRecordWriter RecordWriter that this instance will contain + */ + public RecordWriterContainer(TaskAttemptContext context, + org.apache.hadoop.mapred.RecordWriter, ? super Writable> baseRecordWriter) { + this.baseRecordWriter = baseRecordWriter; + } + + /** + * @return underlying RecordWriter + */ + public org.apache.hadoop.mapred.RecordWriter getBaseRecordWriter() { + return baseRecordWriter; + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/Security.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/Security.java new file mode 100644 index 0000000..d4dc276 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/Security.java @@ -0,0 +1,191 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.thrift.DelegationTokenSelector; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.hadoop.security.token.TokenSelector; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +final class Security { + + private static final Logger LOG = LoggerFactory.getLogger(HCatOutputFormat.class); + + // making sure this is not initialized unless needed + private static final class LazyHolder { + public static final Security INSTANCE = new Security(); + } + + public static Security getInstance() { + return LazyHolder.INSTANCE; + } + + boolean isSecurityEnabled() { + try { + Method m = UserGroupInformation.class.getMethod("isSecurityEnabled"); + return (Boolean) m.invoke(null, (Object[]) null); + } catch (NoSuchMethodException e) { + LOG.info("Security is not supported by this version of hadoop.", e); + } catch (InvocationTargetException e) { + String msg = "Failed to call isSecurityEnabled()"; + LOG.info(msg, e); + throw new IllegalStateException(msg, e); + } catch (IllegalAccessException e) { + String msg = "Failed to call isSecurityEnabled()"; + LOG.info(msg, e); + throw new IllegalStateException(msg, e); + } + return false; + } + + // a signature string to associate with a HCatTableInfo - essentially + // a concatenation of dbname, tablename and partition keyvalues. + String getTokenSignature(OutputJobInfo outputJobInfo) { + StringBuilder result = new StringBuilder(""); + String dbName = outputJobInfo.getDatabaseName(); + if (dbName != null) { + result.append(dbName); + } + String tableName = outputJobInfo.getTableName(); + if (tableName != null) { + result.append("." + tableName); + } + Map partValues = outputJobInfo.getPartitionValues(); + if (partValues != null) { + for (Entry entry : partValues.entrySet()) { + result.append("/"); + result.append(entry.getKey()); + result.append("="); + result.append(entry.getValue()); + } + + } + return result.toString(); + } + + void handleSecurity( + Credentials credentials, + OutputJobInfo outputJobInfo, + HiveMetaStoreClient client, + Configuration conf, + boolean harRequested) + throws IOException, MetaException, TException, Exception { + if (UserGroupInformation.isSecurityEnabled()) { + UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); + // check if oozie has set up a hcat deleg. token - if so use it + TokenSelector hiveTokenSelector = new DelegationTokenSelector(); + //Oozie does not change the service field of the token + //hence by default token generation will have a value of "new Text("")" + //HiveClient will look for a use TokenSelector.selectToken() with service + //set to empty "Text" if hive.metastore.token.signature property is set to null + Token hiveToken = hiveTokenSelector.selectToken( + new Text(), ugi.getTokens()); + if (hiveToken == null) { + // we did not get token set up by oozie, let's get them ourselves here. + // we essentially get a token per unique Output HCatTableInfo - this is + // done because through Pig, setOutput() method is called multiple times + // We want to only get the token once per unique output HCatTableInfo - + // we cannot just get one token since in multi-query case (> 1 store in 1 job) + // or the case when a single pig script results in > 1 jobs, the single + // token will get cancelled by the output committer and the subsequent + // stores will fail - by tying the token with the concatenation of + // dbname, tablename and partition keyvalues of the output + // TableInfo, we can have as many tokens as there are stores and the TokenSelector + // will correctly pick the right tokens which the committer will use and + // cancel. + String tokenSignature = getTokenSignature(outputJobInfo); + // get delegation tokens from hcat server and store them into the "job" + // These will be used in to publish partitions to + // hcat normally in OutputCommitter.commitJob() + // when the JobTracker in Hadoop MapReduce starts supporting renewal of + // arbitrary tokens, the renewer should be the principal of the JobTracker + hiveToken = HCatUtil.extractThriftToken(client.getDelegationToken(ugi.getUserName()), tokenSignature); + + if (harRequested) { + TokenSelector jtTokenSelector = + new org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenSelector(); + Token jtToken = jtTokenSelector.selectToken(org.apache.hadoop.security.SecurityUtil.buildTokenService( + ShimLoader.getHadoopShims().getHCatShim().getResourceManagerAddress(conf)), ugi.getTokens()); + if (jtToken == null) { + //we don't need to cancel this token as the TokenRenewer for JT tokens + //takes care of cancelling them + credentials.addToken( + new Text("hcat jt token"), + HCatUtil.getJobTrackerDelegationToken(conf, ugi.getUserName()) + ); + } + } + + credentials.addToken(new Text(ugi.getUserName() + "_" + tokenSignature), hiveToken); + // this will be used by the outputcommitter to pass on to the metastore client + // which in turn will pass on to the TokenSelector so that it can select + // the right token. + conf.set(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE, tokenSignature); + } + } + } + + void handleSecurity( + Job job, + OutputJobInfo outputJobInfo, + HiveMetaStoreClient client, + Configuration conf, + boolean harRequested) + throws IOException, MetaException, TException, Exception { + handleSecurity(job.getCredentials(), outputJobInfo, client, conf, harRequested); + } + + // we should cancel hcat token if it was acquired by hcat + // and not if it was supplied (ie Oozie). In the latter + // case the HCAT_KEY_TOKEN_SIGNATURE property in the conf will not be set + void cancelToken(HiveMetaStoreClient client, JobContext context) throws IOException, MetaException { + String tokenStrForm = client.getTokenStrForm(); + if (tokenStrForm != null && context.getConfiguration().get(HCatConstants.HCAT_KEY_TOKEN_SIGNATURE) != null) { + try { + client.cancelDelegationToken(tokenStrForm); + } catch (TException e) { + String msg = "Failed to cancel delegation token"; + LOG.error(msg, e); + throw new IOException(msg, e); + } + } + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/StorerInfo.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/StorerInfo.java new file mode 100644 index 0000000..47f8439 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/StorerInfo.java @@ -0,0 +1,109 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import java.io.Serializable; +import java.util.Properties; + +/** Information about the storer to use for writing the data. */ +public class StorerInfo implements Serializable { + + /** The serialization version */ + private static final long serialVersionUID = 1L; + + /** The properties for the storage handler */ + private Properties properties; + + private String ofClass; + + private String ifClass; + + private String serdeClass; + + private String storageHandlerClass; + + /** + * Initialize the storer information. + * @param ifClass the input format class + * @param ofClass the output format class + * @param serdeClass the SerDe class + * @param storageHandlerClass the storage handler class + * @param properties the properties for the storage handler + */ + public StorerInfo(String ifClass, String ofClass, String serdeClass, String storageHandlerClass, Properties properties) { + super(); + this.ifClass = ifClass; + this.ofClass = ofClass; + this.serdeClass = serdeClass; + this.storageHandlerClass = storageHandlerClass; + this.properties = properties; + } + + /** + * @return the input format class + */ + public String getIfClass() { + return ifClass; + } + + /** + * @param ifClass the input format class + */ + public void setIfClass(String ifClass) { + this.ifClass = ifClass; + } + + /** + * @return the output format class + */ + public String getOfClass() { + return ofClass; + } + + /** + * @return the serdeClass + */ + public String getSerdeClass() { + return serdeClass; + } + + /** + * @return the storageHandlerClass + */ + public String getStorageHandlerClass() { + return storageHandlerClass; + } + + /** + * @return the storer properties + */ + public Properties getProperties() { + return properties; + } + + /** + * @param properties the storer properties to set + */ + public void setProperties(Properties properties) { + this.properties = properties; + } + + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/oozie/JavaAction.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/oozie/JavaAction.java new file mode 100644 index 0000000..67517a1 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/oozie/JavaAction.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.oozie; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.cli.CliDriver; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; + +public class JavaAction { + + public static void main(String[] args) throws Exception { + + HiveConf conf = new HiveConf(); + conf.addResource(new Path("file:///", System.getProperty("oozie.action.conf.xml"))); + conf.setVar(ConfVars.SEMANTIC_ANALYZER_HOOK, HCatSemanticAnalyzer.class.getName()); + conf.setBoolVar(ConfVars.METASTORE_USE_THRIFT_SASL, true); + SessionState.start(new CliSessionState(conf)); + new CliDriver().processLine(args[0]); + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceInputFormat.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceInputFormat.java new file mode 100644 index 0000000..11db1a6 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceInputFormat.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.rcfile; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; + +public class RCFileMapReduceInputFormat + extends FileInputFormat { + + @Override + public RecordReader createRecordReader(InputSplit split, + TaskAttemptContext context) throws IOException, InterruptedException { + + context.setStatus(split.toString()); + return new RCFileMapReduceRecordReader(); + } + + @Override + public List getSplits(JobContext job) throws IOException { + + job.getConfiguration().setLong("mapred.min.split.size", SequenceFile.SYNC_INTERVAL); + return super.getSplits(job); + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceOutputFormat.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceOutputFormat.java new file mode 100644 index 0000000..de5dbe6 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceOutputFormat.java @@ -0,0 +1,105 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.rcfile; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.RCFile; +import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.DefaultCodec; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.util.ReflectionUtils; + +/** + * The RC file input format using new Hadoop mapreduce APIs. + */ +public class RCFileMapReduceOutputFormat extends + FileOutputFormat, BytesRefArrayWritable> { + + /** + * Set number of columns into the given configuration. + * @param conf + * configuration instance which need to set the column number + * @param columnNum + * column number for RCFile's Writer + * + */ + public static void setColumnNumber(Configuration conf, int columnNum) { + assert columnNum > 0; + conf.setInt(RCFile.COLUMN_NUMBER_CONF_STR, columnNum); + } + + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.lib.output.FileOutputFormat#getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext) + */ + @Override + public org.apache.hadoop.mapreduce.RecordWriter, BytesRefArrayWritable> getRecordWriter( + TaskAttemptContext task) throws IOException, InterruptedException { + + //FileOutputFormat.getWorkOutputPath takes TaskInputOutputContext instead of + //TaskAttemptContext, so can't use that here + FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(task); + Path outputPath = committer.getWorkPath(); + + FileSystem fs = outputPath.getFileSystem(task.getConfiguration()); + + if (!fs.exists(outputPath)) { + fs.mkdirs(outputPath); + } + + Path file = getDefaultWorkFile(task, ""); + + CompressionCodec codec = null; + if (getCompressOutput(task)) { + Class codecClass = getOutputCompressorClass(task, DefaultCodec.class); + codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, task.getConfiguration()); + } + + final RCFile.Writer out = new RCFile.Writer(fs, task.getConfiguration(), file, task, codec); + + return new RecordWriter, BytesRefArrayWritable>() { + + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.RecordWriter#write(java.lang.Object, java.lang.Object) + */ + @Override + public void write(WritableComparable key, BytesRefArrayWritable value) + throws IOException { + out.append(value); + } + + /* (non-Javadoc) + * @see org.apache.hadoop.mapreduce.RecordWriter#close(org.apache.hadoop.mapreduce.TaskAttemptContext) + */ + @Override + public void close(TaskAttemptContext task) throws IOException, InterruptedException { + out.close(); + } + }; + } + +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceRecordReader.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceRecordReader.java new file mode 100644 index 0000000..d2a5175 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/rcfile/RCFileMapReduceRecordReader.java @@ -0,0 +1,121 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.rcfile; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.RCFile; +import org.apache.hadoop.hive.ql.io.RCFile.Reader; +import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.input.FileSplit; + +public class RCFileMapReduceRecordReader + extends RecordReader { + + private Reader in; + private long start; + private long end; + private boolean more = true; + + // key and value objects are created once in initialize() and then reused + // for every getCurrentKey() and getCurrentValue() call. This is important + // since RCFile makes an assumption of this fact. + + private LongWritable key; + private BytesRefArrayWritable value; + + @Override + public void close() throws IOException { + in.close(); + } + + @Override + public LongWritable getCurrentKey() throws IOException, InterruptedException { + return key; + } + + @Override + public BytesRefArrayWritable getCurrentValue() throws IOException, InterruptedException { + return value; + } + + @Override + public float getProgress() throws IOException, InterruptedException { + if (end == start) { + return 0.0f; + } else { + return Math.min(1.0f, (in.getPosition() - start) / (float) (end - start)); + } + } + + @Override + public boolean nextKeyValue() throws IOException, InterruptedException { + + more = next(key); + if (more) { + in.getCurrentRow(value); + } + + return more; + } + + private boolean next(LongWritable key) throws IOException { + if (!more) { + return false; + } + + more = in.next(key); + if (!more) { + return false; + } + + if (in.lastSeenSyncPos() >= end) { + more = false; + return more; + } + return more; + } + + @Override + public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, + InterruptedException { + + FileSplit fSplit = (FileSplit) split; + Path path = fSplit.getPath(); + Configuration conf = context.getConfiguration(); + this.in = new RCFile.Reader(path.getFileSystem(conf), path, conf); + this.end = fSplit.getStart() + fSplit.getLength(); + + if (fSplit.getStart() > in.getPosition()) { + in.sync(fSplit.getStart()); + } + + this.start = in.getPosition(); + more = start < end; + + key = new LongWritable(); + value = new BytesRefArrayWritable(); + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/security/HdfsAuthorizationProvider.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/security/HdfsAuthorizationProvider.java new file mode 100644 index 0000000..1139a53 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/security/HdfsAuthorizationProvider.java @@ -0,0 +1,337 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.security; + +import static org.apache.hadoop.hive.metastore.MetaStoreUtils.DEFAULT_DATABASE_NAME; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.EnumSet; +import java.util.List; + +import javax.security.auth.login.LoginException; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.ql.metadata.AuthorizationException; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; +import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProviderBase; +import org.apache.hadoop.hive.ql.security.authorization.Privilege; +import org.apache.hadoop.hive.shims.HadoopShims; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.security.UserGroupInformation; + +/** + * An AuthorizationProvider, which checks against the data access level permissions on HDFS. + * It makes sense to eventually move this class to Hive, so that all hive users can + * use this authorization model. + */ +public class HdfsAuthorizationProvider extends HiveAuthorizationProviderBase { + + protected Warehouse wh; + + //Config variables : create an enum to store them if we have more + private static final String PROXY_USER_NAME = "proxy.user.name"; + + public HdfsAuthorizationProvider() { + super(); + } + + public HdfsAuthorizationProvider(Configuration conf) { + super(); + setConf(conf); + } + + @Override + public void init(Configuration conf) throws HiveException { + hive_db = new HiveProxy(Hive.get(new HiveConf(conf, HiveAuthorizationProvider.class))); + } + + @Override + public void setConf(Configuration conf) { + super.setConf(conf); + try { + this.wh = new Warehouse(conf); + } catch (MetaException ex) { + throw new RuntimeException(ex); + } + } + + protected FsAction getFsAction(Privilege priv, Path path) { + + switch (priv.getPriv()) { + case ALL: + throw new AuthorizationException("no matching Action for Privilege.All"); + case ALTER_DATA: + return FsAction.WRITE; + case ALTER_METADATA: + return FsAction.WRITE; + case CREATE: + return FsAction.WRITE; + case DROP: + return FsAction.WRITE; + case INDEX: + return FsAction.WRITE; + case LOCK: + return FsAction.WRITE; + case SELECT: + return FsAction.READ; + case SHOW_DATABASE: + return FsAction.READ; + case UNKNOWN: + default: + throw new AuthorizationException("Unknown privilege"); + } + } + + protected EnumSet getFsActions(Privilege[] privs, Path path) { + EnumSet actions = EnumSet.noneOf(FsAction.class); + + if (privs == null) { + return actions; + } + + for (Privilege priv : privs) { + actions.add(getFsAction(priv, path)); + } + + return actions; + } + + private static final String DATABASE_WAREHOUSE_SUFFIX = ".db"; + + private Path getDefaultDatabasePath(String dbName) throws MetaException { + if (dbName.equalsIgnoreCase(DEFAULT_DATABASE_NAME)) { + return wh.getWhRoot(); + } + return new Path(wh.getWhRoot(), dbName.toLowerCase() + DATABASE_WAREHOUSE_SUFFIX); + } + + protected Path getDbLocation(Database db) throws HiveException { + try { + String location = db.getLocationUri(); + if (location == null) { + return getDefaultDatabasePath(db.getName()); + } else { + return wh.getDnsPath(wh.getDatabasePath(db)); + } + } catch (MetaException ex) { + throw new HiveException(ex.getMessage()); + } + } + + @Override + public void authorize(Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + //Authorize for global level permissions at the warehouse dir + Path root; + try { + root = wh.getWhRoot(); + authorize(root, readRequiredPriv, writeRequiredPriv); + } catch (MetaException ex) { + throw new HiveException(ex); + } + } + + @Override + public void authorize(Database db, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + if (db == null) { + return; + } + + Path path = getDbLocation(db); + + authorize(path, readRequiredPriv, writeRequiredPriv); + } + + @Override + public void authorize(Table table, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + if (table == null) { + return; + } + + //unlike Hive's model, this can be called at CREATE TABLE as well, since we should authorize + //against the table's declared location + Path path = null; + try { + if (table.getTTable().getSd().getLocation() == null + || table.getTTable().getSd().getLocation().isEmpty()) { + path = wh.getTablePath(hive_db.getDatabase(table.getDbName()), table.getTableName()); + } else { + path = table.getPath(); + } + } catch (MetaException ex) { + throw new HiveException(ex); + } + + authorize(path, readRequiredPriv, writeRequiredPriv); + } + + //TODO: HiveAuthorizationProvider should expose this interface instead of #authorize(Partition, Privilege[], Privilege[]) + public void authorize(Table table, Partition part, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + + if (part == null || part.getLocation() == null) { + authorize(table, readRequiredPriv, writeRequiredPriv); + } else { + authorize(part.getPartitionPath(), readRequiredPriv, writeRequiredPriv); + } + } + + @Override + public void authorize(Partition part, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + if (part == null) { + return; + } + authorize(part.getTable(), part, readRequiredPriv, writeRequiredPriv); + } + + @Override + public void authorize(Table table, Partition part, List columns, + Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + //columns cannot live in different files, just check for partition level permissions + authorize(table, part, readRequiredPriv, writeRequiredPriv); + } + + /** + * Authorization privileges against a path. + * @param path a filesystem path + * @param readRequiredPriv a list of privileges needed for inputs. + * @param writeRequiredPriv a list of privileges needed for outputs. + */ + public void authorize(Path path, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + try { + EnumSet actions = getFsActions(readRequiredPriv, path); + actions.addAll(getFsActions(writeRequiredPriv, path)); + if (actions.isEmpty()) { + return; + } + + checkPermissions(getConf(), path, actions); + + } catch (AccessControlException ex) { + throw new AuthorizationException(ex); + } catch (LoginException ex) { + throw new AuthorizationException(ex); + } catch (IOException ex) { + throw new HiveException(ex); + } + } + + /** + * Checks the permissions for the given path and current user on Hadoop FS. If the given path + * does not exists, it checks for it's parent folder. + */ + protected static void checkPermissions(final Configuration conf, final Path path, + final EnumSet actions) throws IOException, LoginException { + + if (path == null) { + throw new IllegalArgumentException("path is null"); + } + + HadoopShims shims = ShimLoader.getHadoopShims(); + final UserGroupInformation ugi; + if (conf.get(PROXY_USER_NAME) != null) { + ugi = UserGroupInformation.createRemoteUser(conf.get(PROXY_USER_NAME)); + } else { + ugi = shims.getUGIForConf(conf); + } + final String user = shims.getShortUserName(ugi); + + final FileSystem fs = path.getFileSystem(conf); + + if (fs.exists(path)) { + checkPermissions(fs, path, actions, user, ugi.getGroupNames()); + } else if (path.getParent() != null) { + // find the ancestor which exists to check it's permissions + Path par = path.getParent(); + while (par != null) { + if (fs.exists(par)) { + break; + } + par = par.getParent(); + } + + checkPermissions(fs, par, actions, user, ugi.getGroupNames()); + } + } + + /** + * Checks the permissions for the given path and current user on Hadoop FS. If the given path + * does not exists, it returns. + */ + @SuppressWarnings("deprecation") + protected static void checkPermissions(final FileSystem fs, final Path path, + final EnumSet actions, String user, String[] groups) throws IOException, + AccessControlException { + + final FileStatus stat; + + try { + stat = fs.getFileStatus(path); + } catch (FileNotFoundException fnfe) { + // File named by path doesn't exist; nothing to validate. + return; + } catch (org.apache.hadoop.fs.permission.AccessControlException ace) { + // Older hadoop version will throw this @deprecated Exception. + throw new AccessControlException(ace.getMessage()); + } + + final FsPermission dirPerms = stat.getPermission(); + final String grp = stat.getGroup(); + + for (FsAction action : actions) { + if (user.equals(stat.getOwner())) { + if (dirPerms.getUserAction().implies(action)) { + continue; + } + } + if (ArrayUtils.contains(groups, grp)) { + if (dirPerms.getGroupAction().implies(action)) { + continue; + } + } + if (dirPerms.getOtherAction().implies(action)) { + continue; + } + throw new AccessControlException("action " + action + " not permitted on path " + + path + " for user " + user); + } + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/security/StorageDelegationAuthorizationProvider.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/security/StorageDelegationAuthorizationProvider.java new file mode 100644 index 0000000..6c28803 --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/security/StorageDelegationAuthorizationProvider.java @@ -0,0 +1,142 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.security; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.ql.metadata.AuthorizationException; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider; +import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; +import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProviderBase; +import org.apache.hadoop.hive.ql.security.authorization.Privilege; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hive.hcatalog.mapreduce.HCatStorageHandler; + +/** + * A HiveAuthorizationProvider which delegates the authorization requests to + * the underlying AuthorizationProviders obtained from the StorageHandler. + */ +public class StorageDelegationAuthorizationProvider extends HiveAuthorizationProviderBase { + + protected HiveAuthorizationProvider hdfsAuthorizer = new HdfsAuthorizationProvider(); + + protected static Map authProviders = new HashMap(); + + @Override + public void setConf(Configuration conf) { + super.setConf(conf); + hdfsAuthorizer.setConf(conf); + } + + @Override + public void init(Configuration conf) throws HiveException { + hive_db = new HiveProxy(Hive.get(new HiveConf(conf, HiveAuthorizationProvider.class))); + } + + @Override + public void setAuthenticator(HiveAuthenticationProvider authenticator) { + super.setAuthenticator(authenticator); + hdfsAuthorizer.setAuthenticator(authenticator); + } + + static { + registerAuthProvider("org.apache.hadoop.hive.hbase.HBaseStorageHandler", + "org.apache.hive.hcatalog.hbase.HBaseAuthorizationProvider"); + registerAuthProvider("org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler", + "org.apache.hive.hcatalog.hbase.HBaseAuthorizationProvider"); + } + + //workaround until Hive adds StorageHandler.getAuthorizationProvider(). Remove these parts afterwards + public static void registerAuthProvider(String storageHandlerClass, + String authProviderClass) { + authProviders.put(storageHandlerClass, authProviderClass); + } + + /** Returns the StorageHandler of the Table obtained from the HCatStorageHandler */ + protected HiveAuthorizationProvider getDelegate(Table table) throws HiveException { + HiveStorageHandler handler = table.getStorageHandler(); + + if (handler != null) { + if (handler instanceof HCatStorageHandler) { + return ((HCatStorageHandler) handler).getAuthorizationProvider(); + } else { + String authProviderClass = authProviders.get(handler.getClass().getCanonicalName()); + + if (authProviderClass != null) { + try { + ReflectionUtils.newInstance(getConf().getClassByName(authProviderClass), getConf()); + } catch (ClassNotFoundException ex) { + throw new HiveException("Cannot instantiate delegation AuthotizationProvider"); + } + } + + //else we do not have anything to delegate to + throw new HiveException(String.format("Storage Handler for table:%s is not an instance " + + "of HCatStorageHandler", table.getTableName())); + } + } else { + //return an authorizer for HDFS + return hdfsAuthorizer; + } + } + + @Override + public void authorize(Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + //global authorizations against warehouse hdfs directory + hdfsAuthorizer.authorize(readRequiredPriv, writeRequiredPriv); + } + + @Override + public void authorize(Database db, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + //db's are tied to a hdfs location + hdfsAuthorizer.authorize(db, readRequiredPriv, writeRequiredPriv); + } + + @Override + public void authorize(Table table, Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + getDelegate(table).authorize(table, readRequiredPriv, writeRequiredPriv); + } + + @Override + public void authorize(Partition part, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, AuthorizationException { + getDelegate(part.getTable()).authorize(part, readRequiredPriv, writeRequiredPriv); + } + + @Override + public void authorize(Table table, Partition part, List columns, + Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + getDelegate(table).authorize(table, part, columns, readRequiredPriv, writeRequiredPriv); + } +} diff --git hcatalog/core/src/main/java/org/apache/hive/hcatalog/storagehandler/DummyHCatAuthProvider.java hcatalog/core/src/main/java/org/apache/hive/hcatalog/storagehandler/DummyHCatAuthProvider.java new file mode 100644 index 0000000..5ced56c --- /dev/null +++ hcatalog/core/src/main/java/org/apache/hive/hcatalog/storagehandler/DummyHCatAuthProvider.java @@ -0,0 +1,144 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.storagehandler; + +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.ql.metadata.AuthorizationException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider; +import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; +import org.apache.hadoop.hive.ql.security.authorization.Privilege; + +/** + * This class is a dummy implementation of HiveAuthorizationProvider to provide + * dummy authorization functionality for other classes to extend and override. + */ +class DummyHCatAuthProvider implements HiveAuthorizationProvider { + + @Override + public Configuration getConf() { + return null; + } + + @Override + public void setConf(Configuration conf) { + } + + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #init(org.apache.hadoop.conf.Configuration) + */ + @Override + public void init(Configuration conf) throws HiveException { + } + + @Override + public HiveAuthenticationProvider getAuthenticator() { + return null; + } + + @Override + public void setAuthenticator(HiveAuthenticationProvider authenticator) { + } + + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } + + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.metastore.api.Database, + * org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Database db, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } + + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.ql.metadata.Table, + * org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Table table, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } + + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.ql.metadata.Partition, + * org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Partition part, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } + + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.ql.metadata.Table, + * org.apache.hadoop.hive.ql.metadata.Partition, java.util.List, + * org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Table table, Partition part, List columns, + Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + } + +} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/ExitException.java hcatalog/core/src/test/java/org/apache/hcatalog/ExitException.java deleted file mode 100644 index 1fd6a85..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/ExitException.java +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog; - -public class ExitException extends SecurityException { - private static final long serialVersionUID = -1982617086752946683L; - private final int status; - - /** - * @return the status - */ - public int getStatus() { - return status; - } - - public ExitException(int status) { - - super("Raising exception, instead of System.exit(). Return code was: " + status); - this.status = status; - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/HcatTestUtils.java hcatalog/core/src/test/java/org/apache/hcatalog/HcatTestUtils.java deleted file mode 100644 index 8bcac97..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/HcatTestUtils.java +++ /dev/null @@ -1,100 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; - -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Utility methods for tests - */ -public class HcatTestUtils { - private static final Logger LOG = LoggerFactory.getLogger(HcatTestUtils.class); - - public static FsPermission perm007 = FsPermission.createImmutable((short) 0007); // -------rwx - public static FsPermission perm070 = FsPermission.createImmutable((short) 0070); // ----rwx--- - public static FsPermission perm700 = FsPermission.createImmutable((short) 0700); // -rwx------ - public static FsPermission perm755 = FsPermission.createImmutable((short) 0755); // -rwxr-xr-x - public static FsPermission perm777 = FsPermission.createImmutable((short) 0777); // -rwxrwxrwx - public static FsPermission perm300 = FsPermission.createImmutable((short) 0300); // --wx------ - public static FsPermission perm500 = FsPermission.createImmutable((short) 0500); // -r-x------ - public static FsPermission perm555 = FsPermission.createImmutable((short) 0555); // -r-xr-xr-x - - /** - * Returns the database path. - */ - public static Path getDbPath(Hive hive, Warehouse wh, String dbName) throws MetaException, HiveException { - return wh.getDatabasePath(hive.getDatabase(dbName)); - } - - /** - * Removes all databases and tables from the metastore - */ - public static void cleanupHMS(Hive hive, Warehouse wh, FsPermission defaultPerm) - throws HiveException, MetaException, NoSuchObjectException { - for (String dbName : hive.getAllDatabases()) { - if (dbName.equals("default")) { - continue; - } - try { - Path path = getDbPath(hive, wh, dbName); - FileSystem whFs = path.getFileSystem(hive.getConf()); - whFs.setPermission(path, defaultPerm); - } catch (IOException ex) { - //ignore - } - hive.dropDatabase(dbName, true, true, true); - } - - //clean tables in default db - for (String tablename : hive.getAllTables("default")) { - hive.dropTable("default", tablename, true, true); - } - } - - public static void createTestDataFile(String filename, String[] lines) throws IOException { - FileWriter writer = null; - try { - File file = new File(filename); - file.deleteOnExit(); - writer = new FileWriter(file); - for (String line : lines) { - writer.write(line + "\n"); - } - } finally { - if (writer != null) { - writer.close(); - } - } - - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/MiniCluster.java hcatalog/core/src/test/java/org/apache/hcatalog/MiniCluster.java deleted file mode 100644 index 134e509..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/MiniCluster.java +++ /dev/null @@ -1,201 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.PrintWriter; -import java.util.Iterator; -import java.util.Map; -import java.util.Properties; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.MiniMRCluster; - -/** - * This class builds a single instance of itself with the Singleton - * design pattern. While building the single instance, it sets up a - * mini cluster that actually consists of a mini DFS cluster and a - * mini MapReduce cluster on the local machine and also sets up the - * environment for Pig to run on top of the mini cluster. - */ -public class MiniCluster { - private MiniDFSCluster m_dfs = null; - private MiniMRCluster m_mr = null; - private FileSystem m_fileSys = null; - private JobConf m_conf = null; - - private final static MiniCluster INSTANCE = new MiniCluster(); - private static boolean isSetup = true; - - private MiniCluster() { - setupMiniDfsAndMrClusters(); - } - - private void setupMiniDfsAndMrClusters() { - try { - final int dataNodes = 1; // There will be 4 data nodes - final int taskTrackers = 1; // There will be 4 task tracker nodes - Configuration config = new Configuration(); - - // Builds and starts the mini dfs and mapreduce clusters - System.setProperty("hadoop.log.dir", "."); - m_dfs = new MiniDFSCluster(config, dataNodes, true, null); - - m_fileSys = m_dfs.getFileSystem(); - m_mr = new MiniMRCluster(taskTrackers, m_fileSys.getUri().toString(), 1); - - // Create the configuration hadoop-site.xml file - File conf_dir = new File(System.getProperty("user.home"), "pigtest/conf/"); - conf_dir.mkdirs(); - File conf_file = new File(conf_dir, "hadoop-site.xml"); - - // Write the necessary config info to hadoop-site.xml - m_conf = m_mr.createJobConf(); - m_conf.setInt("mapred.submit.replication", 1); - m_conf.set("dfs.datanode.address", "0.0.0.0:0"); - m_conf.set("dfs.datanode.http.address", "0.0.0.0:0"); - m_conf.writeXml(new FileOutputStream(conf_file)); - - // Set the system properties needed by Pig - System.setProperty("cluster", m_conf.get("mapred.job.tracker")); - System.setProperty("namenode", m_conf.get("fs.default.name")); - System.setProperty("junit.hadoop.conf", conf_dir.getPath()); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - /** - * Returns the single instance of class MiniClusterBuilder that - * represents the resouces for a mini dfs cluster and a mini - * mapreduce cluster. - */ - public static MiniCluster buildCluster() { - if (!isSetup) { - INSTANCE.setupMiniDfsAndMrClusters(); - isSetup = true; - } - return INSTANCE; - } - - public void shutDown() { - INSTANCE.shutdownMiniDfsAndMrClusters(); - } - - @Override - protected void finalize() { - shutdownMiniDfsAndMrClusters(); - } - - private void shutdownMiniDfsAndMrClusters() { - isSetup = false; - try { - if (m_fileSys != null) { - m_fileSys.close(); - } - } catch (IOException e) { - e.printStackTrace(); - } - if (m_dfs != null) { - m_dfs.shutdown(); - } - if (m_mr != null) { - m_mr.shutdown(); - } - m_fileSys = null; - m_dfs = null; - m_mr = null; - } - - public Properties getProperties() { - errorIfNotSetup(); - Properties properties = new Properties(); - assert m_conf != null; - Iterator> iter = m_conf.iterator(); - while (iter.hasNext()) { - Map.Entry entry = iter.next(); - properties.put(entry.getKey(), entry.getValue()); - } - return properties; - } - - public void setProperty(String name, String value) { - errorIfNotSetup(); - m_conf.set(name, value); - } - - public FileSystem getFileSystem() { - errorIfNotSetup(); - return m_fileSys; - } - - /** - * Throw RunTimeException if isSetup is false - */ - private void errorIfNotSetup() { - if (isSetup) { - return; - } - String msg = "function called on MiniCluster that has been shutdown"; - throw new RuntimeException(msg); - } - - static public void createInputFile(MiniCluster miniCluster, String fileName, - String[] inputData) - throws IOException { - FileSystem fs = miniCluster.getFileSystem(); - createInputFile(fs, fileName, inputData); - } - - static public void createInputFile(FileSystem fs, String fileName, - String[] inputData) throws IOException { - Path path = new Path(fileName); - if (fs.exists(path)) { - throw new IOException("File " + fileName + " already exists on the minicluster"); - } - FSDataOutputStream stream = fs.create(path); - PrintWriter pw = new PrintWriter(new OutputStreamWriter(stream, "UTF-8")); - for (int i = 0; i < inputData.length; i++) { - pw.println(inputData[i]); - } - pw.close(); - - } - - /** - * Helper to remove a dfs file from the minicluster DFS - * - * @param miniCluster reference to the Minicluster where the file should be deleted - * @param fileName pathname of the file to be deleted - * @throws IOException - */ - static public void deleteFile(MiniCluster miniCluster, String fileName) - throws IOException { - FileSystem fs = miniCluster.getFileSystem(); - fs.delete(new Path(fileName), true); - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/NoExitSecurityManager.java hcatalog/core/src/test/java/org/apache/hcatalog/NoExitSecurityManager.java deleted file mode 100644 index bc6c490..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/NoExitSecurityManager.java +++ /dev/null @@ -1,42 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog; - -import java.security.Permission; - -public class NoExitSecurityManager extends SecurityManager { - - @Override - public void checkPermission(Permission perm) { - // allow anything. - } - - @Override - public void checkPermission(Permission perm, Object context) { - // allow anything. - } - - @Override - public void checkExit(int status) { - - super.checkExit(status); - throw new ExitException(status); - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/cli/DummyStorageHandler.java hcatalog/core/src/test/java/org/apache/hcatalog/cli/DummyStorageHandler.java deleted file mode 100644 index ab784ec..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/cli/DummyStorageHandler.java +++ /dev/null @@ -1,289 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.cli; - -import java.io.IOException; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.metastore.HiveMetaHook; -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.ql.io.HiveOutputFormat; -import org.apache.hadoop.hive.ql.metadata.AuthorizationException; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.plan.TableDesc; -import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider; -import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; -import org.apache.hadoop.hive.ql.security.authorization.Privilege; -import org.apache.hadoop.hive.serde2.SerDe; -import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapred.InputFormat; -import org.apache.hadoop.mapred.InputSplit; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.OutputFormat; -import org.apache.hadoop.mapred.RecordReader; -import org.apache.hadoop.mapred.RecordWriter; -import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.util.Progressable; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.mapreduce.HCatStorageHandler; - -class DummyStorageHandler extends HCatStorageHandler { - - @Override - public Configuration getConf() { - return null; - } - - @Override - public void setConf(Configuration conf) { - } - - @Override - public Class getInputFormatClass() { - return DummyInputFormat.class; - } - - @Override - public Class getOutputFormatClass() { - return DummyOutputFormat.class; - } - - @Override - public Class getSerDeClass() { - return ColumnarSerDe.class; - } - - @Override - public HiveMetaHook getMetaHook() { - return null; - } - - @Override - public void configureInputJobProperties(TableDesc tableDesc, Map jobProperties) { - } - - @Override - public void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties) { - } - - @Override - public HiveAuthorizationProvider getAuthorizationProvider() - throws HiveException { - return new DummyAuthProvider(); - } - - private class DummyAuthProvider implements HiveAuthorizationProvider { - - @Override - public Configuration getConf() { - return null; - } - - /* @param conf - * @see org.apache.hadoop.conf.Configurable#setConf(org.apache.hadoop.conf.Configuration) - */ - @Override - public void setConf(Configuration conf) { - } - - /* @param conf - /* @throws HiveException - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#init(org.apache.hadoop.conf.Configuration) - */ - @Override - public void init(Configuration conf) throws HiveException { - } - - /* @return HiveAuthenticationProvider - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#getAuthenticator() - */ - @Override - public HiveAuthenticationProvider getAuthenticator() { - return null; - } - - /* @param authenticator - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#setAuthenticator(org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider) - */ - @Override - public void setAuthenticator(HiveAuthenticationProvider authenticator) { - } - - /* @param readRequiredPriv - /* @param writeRequiredPriv - /* @throws HiveException - /* @throws AuthorizationException - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } - - /* @param db - /* @param readRequiredPriv - /* @param writeRequiredPriv - /* @throws HiveException - /* @throws AuthorizationException - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.metastore.api.Database, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Database db, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } - - /* @param table - /* @param readRequiredPriv - /* @param writeRequiredPriv - /* @throws HiveException - /* @throws AuthorizationException - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.metadata.Table, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(org.apache.hadoop.hive.ql.metadata.Table table, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } - - /* @param part - /* @param readRequiredPriv - /* @param writeRequiredPriv - /* @throws HiveException - /* @throws AuthorizationException - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.metadata.Partition, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Partition part, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } - - /* @param table - /* @param part - /* @param columns - /* @param readRequiredPriv - /* @param writeRequiredPriv - /* @throws HiveException - /* @throws AuthorizationException - * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.metadata.Table, org.apache.hadoop.hive.ql.metadata.Partition, java.util.List, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(org.apache.hadoop.hive.ql.metadata.Table table, Partition part, List columns, - Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - } - - } - - /** - * The Class DummyInputFormat is a dummy implementation of the old hadoop - * mapred.InputFormat required by HiveStorageHandler. - */ - class DummyInputFormat implements - InputFormat { - - /* - * @see - * org.apache.hadoop.mapred.InputFormat#getRecordReader(org.apache.hadoop - * .mapred.InputSplit, org.apache.hadoop.mapred.JobConf, - * org.apache.hadoop.mapred.Reporter) - */ - @Override - public RecordReader getRecordReader( - InputSplit split, JobConf jobconf, Reporter reporter) - throws IOException { - throw new IOException("This operation is not supported."); - } - - /* - * @see - * org.apache.hadoop.mapred.InputFormat#getSplits(org.apache.hadoop. - * mapred .JobConf, int) - */ - @Override - public InputSplit[] getSplits(JobConf jobconf, int number) - throws IOException { - throw new IOException("This operation is not supported."); - } - } - - /** - * The Class DummyOutputFormat is a dummy implementation of the old hadoop - * mapred.OutputFormat and HiveOutputFormat required by HiveStorageHandler. - */ - class DummyOutputFormat implements - OutputFormat, HCatRecord>, - HiveOutputFormat, HCatRecord> { - - /* - * @see - * org.apache.hadoop.mapred.OutputFormat#checkOutputSpecs(org.apache - * .hadoop .fs.FileSystem, org.apache.hadoop.mapred.JobConf) - */ - @Override - public void checkOutputSpecs(FileSystem fs, JobConf jobconf) - throws IOException { - throw new IOException("This operation is not supported."); - - } - - /* - * @see - * org.apache.hadoop.mapred.OutputFormat#getRecordWriter(org.apache. - * hadoop .fs.FileSystem, org.apache.hadoop.mapred.JobConf, - * java.lang.String, org.apache.hadoop.util.Progressable) - */ - @Override - public RecordWriter, HCatRecord> getRecordWriter( - FileSystem fs, JobConf jobconf, String str, - Progressable progress) throws IOException { - throw new IOException("This operation is not supported."); - } - - /* - * @see - * org.apache.hadoop.hive.ql.io.HiveOutputFormat#getHiveRecordWriter(org - * .apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path, - * java.lang.Class, boolean, java.util.Properties, - * org.apache.hadoop.util.Progressable) - */ - @Override - public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter( - JobConf jc, Path finalOutPath, - Class valueClass, boolean isCompressed, - Properties tableProperties, Progressable progress) - throws IOException { - throw new IOException("This operation is not supported."); - } - - } - -} - - diff --git hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestEximSemanticAnalysis.java.broken hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestEximSemanticAnalysis.java.broken deleted file mode 100644 index 506a40e..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestEximSemanticAnalysis.java.broken +++ /dev/null @@ -1,175 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.cli; - -import java.io.IOException; -import java.net.URI; - -import junit.framework.TestCase; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hcatalog.MiniCluster; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.common.HCatConstants; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -public class TestEximSemanticAnalysis extends TestCase { - - private final MiniCluster cluster = MiniCluster.buildCluster(); - private HiveConf hcatConf; - private HCatDriver hcatDriver; - private Warehouse wh; - private static final Logger LOG = LoggerFactory.getLogger(TestEximSemanticAnalysis.class); - - @Override - protected void setUp() throws Exception { - - hcatConf = new HiveConf(this.getClass()); - hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); - hcatConf.set("fs.pfile.impl", "org.apache.hadoop.fs.ProxyLocalFileSystem"); - URI fsuri = cluster.getFileSystem().getUri(); - Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), "/user/hive/warehouse"); - hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); - hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); - wh = new Warehouse(hcatConf); - SessionState.start(new CliSessionState(hcatConf)); - - hcatDriver = new HCatDriver(); - } - - @Override - protected void tearDown() throws Exception { - } - - public void testExportPerms() throws IOException, MetaException, HiveException { - - hcatDriver.run("drop table junit_sem_analysis"); - CommandProcessorResponse response = hcatDriver - .run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - assertEquals(0, response.getResponseCode()); - Path whPath = wh.getTablePath(Hive.get(hcatConf).getDatabase("default"), "junit_sem_analysis"); - cluster.getFileSystem().setPermission(whPath, FsPermission.valueOf("-rwxrwx-wx")); - cluster.getFileSystem().setOwner(whPath, "nosuchuser", "nosuchgroup"); - - Runtime.getRuntime().exec("rm -rf /tmp/hcat"); - response = hcatDriver - .run("export table junit_sem_analysis to 'pfile://local:9080/tmp/hcat/exports/junit_sem_analysis'"); - - assertEquals(10, response.getResponseCode()); - assertTrue("Permission denied expected : "+response.getErrorMessage(), - response.getErrorMessage().startsWith( - "FAILED: Error in semantic analysis: org.apache.hcatalog.common.HCatException : 3000 : Permission denied")); - Runtime.getRuntime().exec("rm -rf /tmp/hcat"); - response = hcatDriver.run("drop table junit_sem_analysis"); - if (response.getResponseCode() != 0) { - LOG.error(response.getErrorMessage()); - fail("Drop table failed"); - } - } - - public void testImportPerms() throws IOException, MetaException, HiveException { - - hcatDriver.run("drop table junit_sem_analysis"); - CommandProcessorResponse response = hcatDriver - .run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - assertEquals(0, response.getResponseCode()); - Runtime.getRuntime().exec("rm -rf /tmp/hcat"); - response = hcatDriver - .run("export table junit_sem_analysis to 'pfile://local:9080/tmp/hcat/exports/junit_sem_analysis'"); - assertEquals(0, response.getResponseCode()); - response = hcatDriver.run("drop table junit_sem_analysis"); - assertEquals(0, response.getResponseCode()); - response = hcatDriver - .run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - assertEquals(0, response.getResponseCode()); - Path whPath = wh.getTablePath(Hive.get(hcatConf).getDatabase("default"), "junit_sem_analysis"); - cluster.getFileSystem().setPermission(whPath, FsPermission.valueOf("-rwxrwxr-x")); - cluster.getFileSystem().setOwner(whPath, "nosuchuser", "nosuchgroup"); - - response = hcatDriver - .run("import table junit_sem_analysis from 'pfile://local:9080/tmp/hcat/exports/junit_sem_analysis'"); - - assertEquals(10, response.getResponseCode()); - assertTrue( - "Permission denied expected: "+response.getErrorMessage() , - response.getErrorMessage().startsWith( - "FAILED: Error in semantic analysis: org.apache.hcatalog.common.HCatException : 3000 : Permission denied")); - Runtime.getRuntime().exec("rm -rf /tmp/hcat"); - - cluster.getFileSystem().setPermission(whPath, FsPermission.valueOf("-rwxrwxrwx")); - response = hcatDriver.run("drop table junit_sem_analysis"); - if (response.getResponseCode() != 0) { - LOG.error(response.getErrorMessage()); - fail("Drop table failed"); - } - } - - public void testImportSetPermsGroup() throws IOException, MetaException, HiveException { - - hcatDriver.run("drop table junit_sem_analysis"); - hcatDriver.run("drop table junit_sem_analysis_imported"); - CommandProcessorResponse response = hcatDriver - .run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - assertEquals(0, response.getResponseCode()); - Runtime.getRuntime().exec("rm -rf /tmp/hcat"); - response = hcatDriver - .run("export table junit_sem_analysis to 'pfile://local:9080/tmp/hcat/exports/junit_sem_analysis'"); - assertEquals(0, response.getResponseCode()); - response = hcatDriver.run("drop table junit_sem_analysis"); - assertEquals(0, response.getResponseCode()); - - hcatConf.set(HCatConstants.HCAT_PERMS, "-rwxrw-r--"); - hcatConf.set(HCatConstants.HCAT_GROUP, "nosuchgroup"); - - response = hcatDriver - .run("import table junit_sem_analysis_imported from 'pfile://local:9080/tmp/hcat/exports/junit_sem_analysis'"); - assertEquals(0, response.getResponseCode()); - - Path whPath = wh.getTablePath(Hive.get(hcatConf).getDatabase("default"), "junit_sem_analysis_imported"); - assertEquals(FsPermission.valueOf("-rwxrw-r--"), cluster.getFileSystem().getFileStatus(whPath).getPermission()); - assertEquals("nosuchgroup", cluster.getFileSystem().getFileStatus(whPath).getGroup()); - - Runtime.getRuntime().exec("rm -rf /tmp/hcat"); - - response = hcatDriver.run("drop table junit_sem_analysis_imported"); - if (response.getResponseCode() != 0) { - LOG.error(response.getErrorMessage()); - fail("Drop table failed"); - } - } - - -} - diff --git hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestPermsGrp.java hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestPermsGrp.java deleted file mode 100644 index a750d27..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestPermsGrp.java +++ /dev/null @@ -1,232 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.cli; - -import java.io.FileNotFoundException; -import java.util.ArrayList; - -import junit.framework.TestCase; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.InvalidObjectException; -import org.apache.hadoop.hive.metastore.api.InvalidOperationException; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.metastore.api.SerDeInfo; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.api.Type; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hcatalog.ExitException; -import org.apache.hcatalog.NoExitSecurityManager; - -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.thrift.TException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TestPermsGrp extends TestCase { - - private boolean isServerRunning = false; - private static final int msPort = 20101; - private HiveConf hcatConf; - private Warehouse clientWH; - private HiveMetaStoreClient msc; - private static final Logger LOG = LoggerFactory.getLogger(TestPermsGrp.class); - - @Override - protected void tearDown() throws Exception { - System.setSecurityManager(securityManager); - } - - @Override - protected void setUp() throws Exception { - - if (isServerRunning) { - return; - } - - MetaStoreUtils.startMetaStore(msPort, ShimLoader.getHadoopThriftAuthBridge()); - - isServerRunning = true; - - securityManager = System.getSecurityManager(); - System.setSecurityManager(new NoExitSecurityManager()); - - hcatConf = new HiveConf(this.getClass()); - hcatConf.set("hive.metastore.local", "false"); - hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://127.0.0.1:" + msPort); - hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); - hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); - - hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); - hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hcatConf.set(HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT.varname, "60"); - clientWH = new Warehouse(hcatConf); - msc = new HiveMetaStoreClient(hcatConf, null); - System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); - System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); - } - - - public void testCustomPerms() throws Exception { - - String dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; - String tblName = "simptbl"; - String typeName = "Person"; - - try { - - // Lets first test for default permissions, this is the case when user specified nothing. - Table tbl = getTable(dbName, tblName, typeName); - msc.createTable(tbl); - Database db = Hive.get(hcatConf).getDatabase(dbName); - Path dfsPath = clientWH.getTablePath(db, tblName); - cleanupTbl(dbName, tblName, typeName); - - // Next user did specify perms. - try { - HCatCli.main(new String[]{"-e", "create table simptbl (name string) stored as RCFILE", "-p", "rwx-wx---"}); - } catch (Exception e) { - assertTrue(e instanceof ExitException); - assertEquals(((ExitException) e).getStatus(), 0); - } - dfsPath = clientWH.getTablePath(db, tblName); - assertTrue(dfsPath.getFileSystem(hcatConf).getFileStatus(dfsPath).getPermission().equals(FsPermission.valueOf("drwx-wx---"))); - - cleanupTbl(dbName, tblName, typeName); - - // User specified perms in invalid format. - hcatConf.set(HCatConstants.HCAT_PERMS, "rwx"); - // make sure create table fails. - try { - HCatCli.main(new String[]{"-e", "create table simptbl (name string) stored as RCFILE", "-p", "rwx"}); - assert false; - } catch (Exception me) { - assertTrue(me instanceof ExitException); - } - // No physical dir gets created. - dfsPath = clientWH.getTablePath(db, tblName); - try { - dfsPath.getFileSystem(hcatConf).getFileStatus(dfsPath); - assert false; - } catch (Exception fnfe) { - assertTrue(fnfe instanceof FileNotFoundException); - } - - // And no metadata gets created. - try { - msc.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName); - assert false; - } catch (Exception e) { - assertTrue(e instanceof NoSuchObjectException); - assertEquals("default.simptbl table not found", e.getMessage()); - } - - // test for invalid group name - hcatConf.set(HCatConstants.HCAT_PERMS, "drw-rw-rw-"); - hcatConf.set(HCatConstants.HCAT_GROUP, "THIS_CANNOT_BE_A_VALID_GRP_NAME_EVER"); - - try { - // create table must fail. - HCatCli.main(new String[]{"-e", "create table simptbl (name string) stored as RCFILE", "-p", "rw-rw-rw-", "-g", "THIS_CANNOT_BE_A_VALID_GRP_NAME_EVER"}); - assert false; - } catch (Exception me) { - assertTrue(me instanceof SecurityException); - } - - try { - // no metadata should get created. - msc.getTable(dbName, tblName); - assert false; - } catch (Exception e) { - assertTrue(e instanceof NoSuchObjectException); - assertEquals("default.simptbl table not found", e.getMessage()); - } - try { - // neither dir should get created. - dfsPath.getFileSystem(hcatConf).getFileStatus(dfsPath); - assert false; - } catch (Exception e) { - assertTrue(e instanceof FileNotFoundException); - } - - } catch (Exception e) { - LOG.error("testCustomPerms failed.", e); - throw e; - } - } - - private void silentDropDatabase(String dbName) throws MetaException, TException { - try { - for (String tableName : msc.getTables(dbName, "*")) { - msc.dropTable(dbName, tableName); - } - - } catch (NoSuchObjectException e) { - } - } - - private void cleanupTbl(String dbName, String tblName, String typeName) throws NoSuchObjectException, MetaException, TException, InvalidOperationException { - - msc.dropTable(dbName, tblName); - msc.dropType(typeName); - } - - private Table getTable(String dbName, String tblName, String typeName) throws NoSuchObjectException, MetaException, TException, AlreadyExistsException, InvalidObjectException { - - msc.dropTable(dbName, tblName); - silentDropDatabase(dbName); - - - msc.dropType(typeName); - Type typ1 = new Type(); - typ1.setName(typeName); - typ1.setFields(new ArrayList(1)); - typ1.getFields().add(new FieldSchema("name", serdeConstants.STRING_TYPE_NAME, "")); - msc.createType(typ1); - - Table tbl = new Table(); - tbl.setDbName(dbName); - tbl.setTableName(tblName); - StorageDescriptor sd = new StorageDescriptor(); - tbl.setSd(sd); - sd.setCols(typ1.getFields()); - - sd.setSerdeInfo(new SerDeInfo()); - return tbl; - } - - - private SecurityManager securityManager; - -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestSemanticAnalysis.java hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestSemanticAnalysis.java deleted file mode 100644 index d7a2b68..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestSemanticAnalysis.java +++ /dev/null @@ -1,421 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.cli; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.mapred.TextInputFormat; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.mapreduce.HCatBaseTest; -import org.apache.thrift.TException; -import org.junit.Before; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TestSemanticAnalysis extends HCatBaseTest { - - private static final Logger LOG = LoggerFactory.getLogger(TestSemanticAnalysis.class); - private static final String TBL_NAME = "junit_sem_analysis"; - - private Driver hcatDriver = null; - private String query; - - @Before - public void setUpHCatDriver() throws IOException { - if (hcatDriver == null) { - HiveConf hcatConf = new HiveConf(hiveConf); - hcatConf.set(HiveConf.ConfVars.HIVEDEFAULTRCFILESERDE.varname, - "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"); - hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - hcatDriver = new Driver(hcatConf); - SessionState.start(new CliSessionState(hcatConf)); - } - } - - @Test - public void testDescDB() throws CommandNeedRetryException, IOException { - hcatDriver.run("drop database mydb cascade"); - assertEquals(0, hcatDriver.run("create database mydb").getResponseCode()); - CommandProcessorResponse resp = hcatDriver.run("describe database mydb"); - assertEquals(0, resp.getResponseCode()); - ArrayList result = new ArrayList(); - hcatDriver.getResults(result); - assertTrue(result.get(0).contains("mydb.db")); - hcatDriver.run("drop database mydb cascade"); - } - - @Test - public void testCreateTblWithLowerCasePartNames() throws CommandNeedRetryException, MetaException, TException, NoSuchObjectException { - driver.run("drop table junit_sem_analysis"); - CommandProcessorResponse resp = driver.run("create table junit_sem_analysis (a int) partitioned by (B string) stored as TEXTFILE"); - assertEquals(resp.getResponseCode(), 0); - assertEquals(null, resp.getErrorMessage()); - Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - assertEquals("Partition key name case problem", "b", tbl.getPartitionKeys().get(0).getName()); - driver.run("drop table junit_sem_analysis"); - } - - @Test - public void testAlterTblFFpart() throws MetaException, TException, NoSuchObjectException, CommandNeedRetryException { - - driver.run("drop table junit_sem_analysis"); - driver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as TEXTFILE"); - driver.run("alter table junit_sem_analysis add partition (b='2010-10-10')"); - hcatDriver.run("alter table junit_sem_analysis partition (b='2010-10-10') set fileformat RCFILE"); - - Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - assertEquals(TextInputFormat.class.getName(), tbl.getSd().getInputFormat()); - assertEquals(HiveIgnoreKeyTextOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); - - List partVals = new ArrayList(1); - partVals.add("2010-10-10"); - Partition part = client.getPartition(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME, partVals); - - assertEquals(RCFileInputFormat.class.getName(), part.getSd().getInputFormat()); - assertEquals(RCFileOutputFormat.class.getName(), part.getSd().getOutputFormat()); - - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testUsNonExistentDB() throws CommandNeedRetryException { - CommandProcessorResponse resp = hcatDriver.run("use no_such_db"); - assertEquals(1, resp.getResponseCode()); - } - - @Test - public void testDatabaseOperations() throws MetaException, CommandNeedRetryException { - - List dbs = client.getAllDatabases(); - String testDb1 = "testdatabaseoperatons1"; - String testDb2 = "testdatabaseoperatons2"; - - if (dbs.contains(testDb1.toLowerCase())) { - assertEquals(0, hcatDriver.run("drop database " + testDb1).getResponseCode()); - } - - if (dbs.contains(testDb2.toLowerCase())) { - assertEquals(0, hcatDriver.run("drop database " + testDb2).getResponseCode()); - } - - assertEquals(0, hcatDriver.run("create database " + testDb1).getResponseCode()); - assertTrue(client.getAllDatabases().contains(testDb1)); - assertEquals(0, hcatDriver.run("create database if not exists " + testDb1).getResponseCode()); - assertTrue(client.getAllDatabases().contains(testDb1)); - assertEquals(0, hcatDriver.run("create database if not exists " + testDb2).getResponseCode()); - assertTrue(client.getAllDatabases().contains(testDb2)); - - assertEquals(0, hcatDriver.run("drop database " + testDb1).getResponseCode()); - assertEquals(0, hcatDriver.run("drop database " + testDb2).getResponseCode()); - assertFalse(client.getAllDatabases().contains(testDb1)); - assertFalse(client.getAllDatabases().contains(testDb2)); - } - - @Test - public void testCreateTableIfNotExists() throws MetaException, TException, NoSuchObjectException, CommandNeedRetryException { - - hcatDriver.run("drop table " + TBL_NAME); - hcatDriver.run("create table junit_sem_analysis (a int) stored as RCFILE"); - Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - List cols = tbl.getSd().getCols(); - assertEquals(1, cols.size()); - assertTrue(cols.get(0).equals(new FieldSchema("a", "int", null))); - assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); - assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); - - CommandProcessorResponse resp = hcatDriver.run("create table if not exists junit_sem_analysis (a int) stored as RCFILE"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - cols = tbl.getSd().getCols(); - assertEquals(1, cols.size()); - assertTrue(cols.get(0).equals(new FieldSchema("a", "int", null))); - assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); - assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); - - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testAlterTblTouch() throws CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis touch"); - assertEquals(0, response.getResponseCode()); - - hcatDriver.run("alter table junit_sem_analysis touch partition (b='12')"); - assertEquals(0, response.getResponseCode()); - - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testChangeColumns() throws CommandNeedRetryException { - hcatDriver.run("drop table junit_sem_analysis"); - hcatDriver.run("create table junit_sem_analysis (a int, c string) partitioned by (b string) stored as RCFILE"); - CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis change a a1 int"); - assertEquals(0, response.getResponseCode()); - - response = hcatDriver.run("alter table junit_sem_analysis change a1 a string"); - assertEquals(0, response.getResponseCode()); - - response = hcatDriver.run("alter table junit_sem_analysis change a a int after c"); - assertEquals(0, response.getResponseCode()); - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testAddReplaceCols() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - hcatDriver.run("create table junit_sem_analysis (a int, c string) partitioned by (b string) stored as RCFILE"); - CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis replace columns (a1 tinyint)"); - assertEquals(0, response.getResponseCode()); - - response = hcatDriver.run("alter table junit_sem_analysis add columns (d tinyint)"); - assertEquals(0, response.getResponseCode()); - assertNull(response.getErrorMessage()); - - response = hcatDriver.run("describe extended junit_sem_analysis"); - assertEquals(0, response.getResponseCode()); - Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - List cols = tbl.getSd().getCols(); - assertEquals(2, cols.size()); - assertTrue(cols.get(0).equals(new FieldSchema("a1", "tinyint", null))); - assertTrue(cols.get(1).equals(new FieldSchema("d", "tinyint", null))); - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testAlterTblClusteredBy() throws CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis clustered by (a) into 7 buckets"); - assertEquals(0, response.getResponseCode()); - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testAlterTableSetFF() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - - Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); - assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); - - hcatDriver.run("alter table junit_sem_analysis set fileformat INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " + - "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver 'mydriver' outputdriver 'yourdriver'"); - hcatDriver.run("desc extended junit_sem_analysis"); - - tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); - assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); - - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testAddPartFail() throws CommandNeedRetryException { - - driver.run("drop table junit_sem_analysis"); - driver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis add partition (b='2') location 'README.txt'"); - assertEquals(0, response.getResponseCode()); - driver.run("drop table junit_sem_analysis"); - } - - @Test - public void testAddPartPass() throws IOException, CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); - CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis add partition (b='2') location '" + TEST_DATA_DIR + "'"); - assertEquals(0, response.getResponseCode()); - assertNull(response.getErrorMessage()); - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testCTAS() throws CommandNeedRetryException { - hcatDriver.run("drop table junit_sem_analysis"); - query = "create table junit_sem_analysis (a int) as select * from tbl2"; - CommandProcessorResponse response = hcatDriver.run(query); - assertEquals(40000, response.getResponseCode()); - assertTrue(response.getErrorMessage().contains("FAILED: SemanticException Operation not supported. Create table as Select is not a valid operation.")); - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testStoredAs() throws CommandNeedRetryException { - hcatDriver.run("drop table junit_sem_analysis"); - query = "create table junit_sem_analysis (a int)"; - CommandProcessorResponse response = hcatDriver.run(query); - assertEquals(0, response.getResponseCode()); - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testAddDriverInfo() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as " + - "INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " + - "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver 'mydriver' outputdriver 'yourdriver' "; - assertEquals(0, hcatDriver.run(query).getResponseCode()); - - Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); - assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); - assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); - - hcatDriver.run("drop table junit_sem_analysis"); - } - - @Test - public void testInvalidateNonStringPartition() throws IOException, CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - query = "create table junit_sem_analysis (a int) partitioned by (b int) stored as RCFILE"; - - CommandProcessorResponse response = hcatDriver.run(query); - assertEquals(40000, response.getResponseCode()); - assertEquals("FAILED: SemanticException Operation not supported. HCatalog only supports partition columns of type string. For column: b Found type: int", - response.getErrorMessage()); - - } - - @Test - public void testInvalidateSeqFileStoredAs() throws IOException, CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as SEQUENCEFILE"; - - CommandProcessorResponse response = hcatDriver.run(query); - assertEquals(0, response.getResponseCode()); - - } - - @Test - public void testInvalidateTextFileStoredAs() throws IOException, CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as TEXTFILE"; - - CommandProcessorResponse response = hcatDriver.run(query); - assertEquals(0, response.getResponseCode()); - - } - - @Test - public void testInvalidateClusteredBy() throws IOException, CommandNeedRetryException { - - hcatDriver.run("drop table junit_sem_analysis"); - query = "create table junit_sem_analysis (a int) partitioned by (b string) clustered by (a) into 10 buckets stored as TEXTFILE"; - - CommandProcessorResponse response = hcatDriver.run(query); - assertEquals(0, response.getResponseCode()); - } - - @Test - public void testCTLFail() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_sem_analysis"); - driver.run("drop table like_table"); - query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"; - - driver.run(query); - query = "create table like_table like junit_sem_analysis"; - CommandProcessorResponse response = hcatDriver.run(query); - assertEquals(0, response.getResponseCode()); - } - - @Test - public void testCTLPass() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { - - try { - hcatDriver.run("drop table junit_sem_analysis"); - } catch (Exception e) { - LOG.error("Error in drop table.", e); - } - query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"; - - hcatDriver.run(query); - String likeTbl = "like_table"; - hcatDriver.run("drop table " + likeTbl); - query = "create table like_table like junit_sem_analysis"; - CommandProcessorResponse resp = hcatDriver.run(query); - assertEquals(0, resp.getResponseCode()); -// Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, likeTbl); -// assertEquals(likeTbl,tbl.getTableName()); -// List cols = tbl.getSd().getCols(); -// assertEquals(1, cols.size()); -// assertEquals(new FieldSchema("a", "int", null), cols.get(0)); -// assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat",tbl.getSd().getInputFormat()); -// assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat",tbl.getSd().getOutputFormat()); -// Map tblParams = tbl.getParameters(); -// assertEquals("org.apache.hadoop.hive.hcat.rcfile.RCFileInputStorageDriver", tblParams.get("hcat.isd")); -// assertEquals("org.apache.hadoop.hive.hcat.rcfile.RCFileOutputStorageDriver", tblParams.get("hcat.osd")); -// -// hcatDriver.run("drop table junit_sem_analysis"); -// hcatDriver.run("drop table "+likeTbl); - } - -// This test case currently fails, since add partitions don't inherit anything from tables. - -// public void testAddPartInheritDrivers() throws MetaException, TException, NoSuchObjectException{ -// -// hcatDriver.run("drop table "+TBL_NAME); -// hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); -// hcatDriver.run("alter table "+TBL_NAME+" add partition (b='2010-10-10')"); -// -// List partVals = new ArrayList(1); -// partVals.add("2010-10-10"); -// -// Map map = client.getPartition(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME, partVals).getParameters(); -// assertEquals(map.get(InitializeInput.HOWL_ISD_CLASS), RCFileInputStorageDriver.class.getName()); -// assertEquals(map.get(InitializeInput.HOWL_OSD_CLASS), RCFileOutputStorageDriver.class.getName()); -// } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestStorageHandlerProperties.java.broken hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestStorageHandlerProperties.java.broken deleted file mode 100644 index 7612337..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestStorageHandlerProperties.java.broken +++ /dev/null @@ -1,86 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.cli; - -import static org.junit.Assert.assertEquals; - -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.thrift.TException; - -import junit.framework.TestCase; - -public class TestStorageHandlerProperties extends TestCase { - - private Driver hcatDriver; - private Driver hiveDriver; - private HiveMetaStoreClient msc; - - protected void setUp() throws Exception { - HiveConf hcatConf = new HiveConf(this.getClass()); - hcatConf.set(ConfVars.PREEXECHOOKS.varname, ""); - hcatConf.set(ConfVars.POSTEXECHOOKS.varname, ""); - hcatConf.set(ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - - HiveConf hiveConf = new HiveConf(hcatConf,this.getClass()); - hiveDriver = new Driver(hiveConf); - - hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); - hcatDriver = new Driver(hcatConf); - - msc = new HiveMetaStoreClient(hcatConf); - SessionState.start(new CliSessionState(hcatConf)); - } - - public void testTableProperties() throws CommandNeedRetryException, MetaException ,TException, NoSuchObjectException{ - hcatDriver.run("drop table test_table"); - CommandProcessorResponse response = hcatDriver - .run("create table test_table(key int, value string) STORED BY " + - "'org.apache.hcatalog.cli.DummyStorageHandler' "); - - assertEquals(0, response.getResponseCode()); - Table tbl = msc.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, "test_table"); - DummyStorageHandler dsh = new DummyStorageHandler(); - assertTrue(tbl.getParameters().containsKey(HCatConstants.HCAT_ISD_CLASS)); - assertTrue(tbl.getParameters().containsKey(HCatConstants.HCAT_OSD_CLASS)); - assertEquals(tbl.getParameters().get(HCatConstants.HCAT_ISD_CLASS), dsh.getInputStorageDriver().getName()); - assertEquals(tbl.getParameters().get(HCatConstants.HCAT_OSD_CLASS), dsh.getOutputStorageDriver().getName()); - } - - /* @throws java.lang.Exception - * @see junit.framework.TestCase#tearDown() - */ - protected void tearDown() throws Exception { - super.tearDown(); - } - -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestUseDatabase.java hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestUseDatabase.java deleted file mode 100644 index 1112749..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/cli/TestUseDatabase.java +++ /dev/null @@ -1,77 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.cli; - -import java.io.IOException; - -import junit.framework.TestCase; - -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; - -/* Unit test for GitHub Howl issue #3 */ -public class TestUseDatabase extends TestCase { - - private Driver hcatDriver; - - @Override - protected void setUp() throws Exception { - - HiveConf hcatConf = new HiveConf(this.getClass()); - hcatConf.set(ConfVars.PREEXECHOOKS.varname, ""); - hcatConf.set(ConfVars.POSTEXECHOOKS.varname, ""); - hcatConf.set(ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - - hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); - hcatDriver = new Driver(hcatConf); - SessionState.start(new CliSessionState(hcatConf)); - } - - String query; - private final String dbName = "testUseDatabase_db"; - private final String tblName = "testUseDatabase_tbl"; - - public void testAlterTablePass() throws IOException, CommandNeedRetryException { - - hcatDriver.run("create database " + dbName); - hcatDriver.run("use " + dbName); - hcatDriver.run("create table " + tblName + " (a int) partitioned by (b string) stored as RCFILE"); - - CommandProcessorResponse response; - - response = hcatDriver.run("alter table " + tblName + " add partition (b='2') location '/tmp'"); - assertEquals(0, response.getResponseCode()); - assertNull(response.getErrorMessage()); - - response = hcatDriver.run("alter table " + tblName + " set fileformat INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " + - "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver 'mydriver' outputdriver 'yourdriver'"); - assertEquals(0, response.getResponseCode()); - assertNull(response.getErrorMessage()); - - hcatDriver.run("drop table " + tblName); - hcatDriver.run("drop database " + dbName); - } - -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/common/TestHCatUtil.java hcatalog/core/src/test/java/org/apache/hcatalog/common/TestHCatUtil.java deleted file mode 100644 index 3027d38..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/common/TestHCatUtil.java +++ /dev/null @@ -1,183 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.common; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hive.metastore.TableType; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Order; -import org.apache.hadoop.hive.metastore.api.SerDeInfo; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.junit.Assert; -import org.junit.Test; - -public class TestHCatUtil { - - @Test - public void testFsPermissionOperation() { - - HashMap permsCode = new HashMap(); - - for (int i = 0; i < 8; i++) { - for (int j = 0; j < 8; j++) { - for (int k = 0; k < 8; k++) { - StringBuilder sb = new StringBuilder(); - sb.append("0"); - sb.append(i); - sb.append(j); - sb.append(k); - Integer code = (((i * 8) + j) * 8) + k; - String perms = (new FsPermission(Short.decode(sb.toString()))).toString(); - if (permsCode.containsKey(perms)) { - Assert.assertEquals("permissions(" + perms + ") mapped to multiple codes", code, permsCode.get(perms)); - } - permsCode.put(perms, code); - assertFsPermissionTransformationIsGood(perms); - } - } - } - } - - private void assertFsPermissionTransformationIsGood(String perms) { - Assert.assertEquals(perms, FsPermission.valueOf("-" + perms).toString()); - } - - @Test - public void testValidateMorePermissive() { - assertConsistentFsPermissionBehaviour(FsAction.ALL, true, true, true, true, true, true, true, true); - assertConsistentFsPermissionBehaviour(FsAction.READ, false, true, false, true, false, false, false, false); - assertConsistentFsPermissionBehaviour(FsAction.WRITE, false, true, false, false, true, false, false, false); - assertConsistentFsPermissionBehaviour(FsAction.EXECUTE, false, true, true, false, false, false, false, false); - assertConsistentFsPermissionBehaviour(FsAction.READ_EXECUTE, false, true, true, true, false, true, false, false); - assertConsistentFsPermissionBehaviour(FsAction.READ_WRITE, false, true, false, true, true, false, true, false); - assertConsistentFsPermissionBehaviour(FsAction.WRITE_EXECUTE, false, true, true, false, true, false, false, true); - assertConsistentFsPermissionBehaviour(FsAction.NONE, false, true, false, false, false, false, false, false); - } - - - private void assertConsistentFsPermissionBehaviour( - FsAction base, boolean versusAll, boolean versusNone, - boolean versusX, boolean versusR, boolean versusW, - boolean versusRX, boolean versusRW, boolean versusWX) { - - Assert.assertTrue(versusAll == HCatUtil.validateMorePermissive(base, FsAction.ALL)); - Assert.assertTrue(versusX == HCatUtil.validateMorePermissive(base, FsAction.EXECUTE)); - Assert.assertTrue(versusNone == HCatUtil.validateMorePermissive(base, FsAction.NONE)); - Assert.assertTrue(versusR == HCatUtil.validateMorePermissive(base, FsAction.READ)); - Assert.assertTrue(versusRX == HCatUtil.validateMorePermissive(base, FsAction.READ_EXECUTE)); - Assert.assertTrue(versusRW == HCatUtil.validateMorePermissive(base, FsAction.READ_WRITE)); - Assert.assertTrue(versusW == HCatUtil.validateMorePermissive(base, FsAction.WRITE)); - Assert.assertTrue(versusWX == HCatUtil.validateMorePermissive(base, FsAction.WRITE_EXECUTE)); - } - - @Test - public void testExecutePermissionsCheck() { - Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.ALL)); - Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.NONE)); - Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.EXECUTE)); - Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.READ_EXECUTE)); - Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.WRITE_EXECUTE)); - - Assert.assertFalse(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.READ)); - Assert.assertFalse(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.WRITE)); - Assert.assertFalse(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.READ_WRITE)); - - } - - @Test - public void testGetTableSchemaWithPtnColsApi() throws IOException { - // Check the schema of a table with one field & no partition keys. - StorageDescriptor sd = new StorageDescriptor( - Lists.newArrayList(new FieldSchema("username", serdeConstants.STRING_TYPE_NAME, null)), - "location", "org.apache.hadoop.mapred.TextInputFormat", - "org.apache.hadoop.mapred.TextOutputFormat", false, -1, new SerDeInfo(), - new ArrayList(), new ArrayList(), new HashMap()); - org.apache.hadoop.hive.metastore.api.Table apiTable = - new org.apache.hadoop.hive.metastore.api.Table("test_tblname", "test_dbname", "test_owner", - 0, 0, 0, sd, new ArrayList(), new HashMap(), - "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name()); - Table table = new Table(apiTable); - - List expectedHCatSchema = - Lists.newArrayList(new HCatFieldSchema("username", HCatFieldSchema.Type.STRING, null)); - - Assert.assertEquals(new HCatSchema(expectedHCatSchema), - HCatUtil.getTableSchemaWithPtnCols(table)); - - // Add a partition key & ensure its reflected in the schema. - List partitionKeys = - Lists.newArrayList(new FieldSchema("dt", serdeConstants.STRING_TYPE_NAME, null)); - table.getTTable().setPartitionKeys(partitionKeys); - expectedHCatSchema.add(new HCatFieldSchema("dt", HCatFieldSchema.Type.STRING, null)); - Assert.assertEquals(new HCatSchema(expectedHCatSchema), - HCatUtil.getTableSchemaWithPtnCols(table)); - } - - /** - * Hive represents tables in two ways: - *

    - *
  • org.apache.hadoop.hive.metastore.api.Table - exactly whats stored in the metastore
  • - *
  • org.apache.hadoop.hive.ql.metadata.Table - adds business logic over api.Table
  • - *
- * Here we check SerDe-reported fields are included in the table schema. - */ - @Test - public void testGetTableSchemaWithPtnColsSerDeReportedFields() throws IOException { - Map parameters = Maps.newHashMap(); - parameters.put(serdeConstants.SERIALIZATION_CLASS, - "org.apache.hadoop.hive.serde2.thrift.test.IntString"); - parameters.put(serdeConstants.SERIALIZATION_FORMAT, "org.apache.thrift.protocol.TBinaryProtocol"); - - SerDeInfo serDeInfo = new SerDeInfo(null, - "org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer", parameters); - - // StorageDescriptor has an empty list of fields - SerDe will report them. - StorageDescriptor sd = new StorageDescriptor(new ArrayList(), "location", - "org.apache.hadoop.mapred.TextInputFormat", "org.apache.hadoop.mapred.TextOutputFormat", - false, -1, serDeInfo, new ArrayList(), new ArrayList(), - new HashMap()); - - org.apache.hadoop.hive.metastore.api.Table apiTable = - new org.apache.hadoop.hive.metastore.api.Table("test_tblname", "test_dbname", "test_owner", - 0, 0, 0, sd, new ArrayList(), new HashMap(), - "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name()); - Table table = new Table(apiTable); - - List expectedHCatSchema = Lists.newArrayList( - new HCatFieldSchema("myint", HCatFieldSchema.Type.INT, null), - new HCatFieldSchema("mystring", HCatFieldSchema.Type.STRING, null), - new HCatFieldSchema("underscore_int", HCatFieldSchema.Type.INT, null)); - - Assert.assertEquals(new HCatSchema(expectedHCatSchema), - HCatUtil.getTableSchemaWithPtnCols(table)); - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/common/TestHiveClientCache.java hcatalog/core/src/test/java/org/apache/hcatalog/common/TestHiveClientCache.java deleted file mode 100644 index 9d9a0fc..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/common/TestHiveClientCache.java +++ /dev/null @@ -1,267 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.common; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStore; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.InvalidObjectException; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.metastore.api.SerDeInfo; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hcatalog.NoExitSecurityManager; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.thrift.TException; -import org.junit.AfterClass; -import org.junit.BeforeClass; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNotSame; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import org.junit.Ignore; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.security.auth.login.LoginException; -import java.io.IOException; -import java.math.BigInteger; -import java.util.ArrayList; -import java.util.List; -import java.util.Random; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; - -public class TestHiveClientCache { - - private static final Logger LOG = LoggerFactory.getLogger(TestHiveClientCache.class); - final HiveConf hiveConf = new HiveConf(); - - @BeforeClass - public static void setUp() throws Exception { - } - - @AfterClass - public static void tearDown() throws Exception { - } - - @Test - public void testCacheHit() throws IOException, MetaException, LoginException { - - HiveClientCache cache = new HiveClientCache(1000); - HiveMetaStoreClient client = cache.get(hiveConf); - assertNotNull(client); - client.close(); // close shouldn't matter - - // Setting a non important configuration should return the same client only - hiveConf.setIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS, 10); - HiveMetaStoreClient client2 = cache.get(hiveConf); - assertNotNull(client2); - assertEquals(client, client2); - client2.close(); - } - - @Test - public void testCacheMiss() throws IOException, MetaException, LoginException { - HiveClientCache cache = new HiveClientCache(1000); - HiveMetaStoreClient client = cache.get(hiveConf); - assertNotNull(client); - - // Set different uri as it is one of the criteria deciding whether to return the same client or not - hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, " "); // URIs are checked for string equivalence, even spaces make them different - HiveMetaStoreClient client2 = cache.get(hiveConf); - assertNotNull(client2); - assertNotSame(client, client2); - } - - /** - * Check that a new client is returned for the same configuration after the expiry time. - * Also verify that the expiry time configuration is honoured - */ - @Test - public void testCacheExpiry() throws IOException, MetaException, LoginException, InterruptedException { - HiveClientCache cache = new HiveClientCache(1); - HiveClientCache.CacheableHiveMetaStoreClient client = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(hiveConf); - assertNotNull(client); - - Thread.sleep(2500); - HiveMetaStoreClient client2 = cache.get(hiveConf); - client.close(); - assertTrue(client.isClosed()); // close() after *expiry time* and *a cache access* should have tore down the client - - assertNotNull(client2); - assertNotSame(client, client2); - } - - /** - * Check that a *new* client is created if asked from different threads even with - * the same hive configuration - * @throws ExecutionException - * @throws InterruptedException - */ - @Test - public void testMultipleThreadAccess() throws ExecutionException, InterruptedException { - final HiveClientCache cache = new HiveClientCache(1000); - - class GetHiveClient implements Callable { - @Override - public HiveMetaStoreClient call() throws IOException, MetaException, LoginException { - return cache.get(hiveConf); - } - } - - ExecutorService executor = Executors.newFixedThreadPool(2); - - Callable worker1 = new GetHiveClient(); - Callable worker2 = new GetHiveClient(); - Future clientFuture1 = executor.submit(worker1); - Future clientFuture2 = executor.submit(worker2); - HiveMetaStoreClient client1 = clientFuture1.get(); - HiveMetaStoreClient client2 = clientFuture2.get(); - assertNotNull(client1); - assertNotNull(client2); - assertNotSame(client1, client2); - } - - @Test - public void testCloseAllClients() throws IOException, MetaException, LoginException { - final HiveClientCache cache = new HiveClientCache(1000); - HiveClientCache.CacheableHiveMetaStoreClient client1 = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(hiveConf); - hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, " "); // URIs are checked for string equivalence, even spaces make them different - HiveClientCache.CacheableHiveMetaStoreClient client2 = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(hiveConf); - cache.closeAllClientsQuietly(); - assertTrue(client1.isClosed()); - assertTrue(client2.isClosed()); - } - - /** - * Test that a long table name actually breaks the HMSC. Subsequently check that isOpen() reflects - * and tells if the client is broken - */ - @Ignore("hangs indefinitely") - @Test - public void testHMSCBreakability() throws IOException, MetaException, LoginException, TException, AlreadyExistsException, - InvalidObjectException, NoSuchObjectException, InterruptedException { - // Setup - LocalMetaServer metaServer = new LocalMetaServer(); - metaServer.start(); - - final HiveClientCache cache = new HiveClientCache(1000); - HiveClientCache.CacheableHiveMetaStoreClient client = - (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(metaServer.getHiveConf()); - - assertTrue(client.isOpen()); - - final String DB_NAME = "test_db"; - final String LONG_TABLE_NAME = "long_table_name_" + new BigInteger(200, new Random()).toString(2); - - try { - client.dropTable(DB_NAME, LONG_TABLE_NAME); - } catch (Exception e) { - } - try { - client.dropDatabase(DB_NAME); - } catch (Exception e) { - } - - client.createDatabase(new Database(DB_NAME, "", null, null)); - - List fields = new ArrayList(); - fields.add(new FieldSchema("colname", serdeConstants.STRING_TYPE_NAME, "")); - Table tbl = new Table(); - tbl.setDbName(DB_NAME); - tbl.setTableName(LONG_TABLE_NAME); - StorageDescriptor sd = new StorageDescriptor(); - sd.setCols(fields); - tbl.setSd(sd); - sd.setSerdeInfo(new SerDeInfo()); - - // Break the client - try { - client.createTable(tbl); - fail("Exception was expected while creating table with long name"); - } catch (Exception e) { - } - - assertFalse(client.isOpen()); - metaServer.shutDown(); - } - - private static class LocalMetaServer implements Runnable { - public final int MS_PORT = 20101; - private final HiveConf hiveConf; - private final SecurityManager securityManager; - public final static int WAIT_TIME_FOR_BOOTUP = 30000; - - public LocalMetaServer() { - securityManager = System.getSecurityManager(); - System.setSecurityManager(new NoExitSecurityManager()); - hiveConf = new HiveConf(TestHiveClientCache.class); - hiveConf.set("hive.metastore.local", "false"); - hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" - + MS_PORT); - hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); - hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); - hiveConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, - "false"); - System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); - System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); - } - - public void start() throws InterruptedException { - Thread thread = new Thread(this); - thread.start(); - Thread.sleep(WAIT_TIME_FOR_BOOTUP); // Wait for the server to bootup - } - - @Override - public void run() { - try { - HiveMetaStore.main(new String[]{"-v", "-p", String.valueOf(MS_PORT)}); - } catch (Throwable t) { - LOG.error("Exiting. Got exception from metastore: ", t); - } - } - - public HiveConf getHiveConf() { - return hiveConf; - } - - public void shutDown() { - System.setSecurityManager(securityManager); - } - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/data/HCatDataCheckUtil.java hcatalog/core/src/test/java/org/apache/hcatalog/data/HCatDataCheckUtil.java deleted file mode 100644 index 3e829f7..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/data/HCatDataCheckUtil.java +++ /dev/null @@ -1,114 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.data; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map.Entry; - -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hcatalog.MiniCluster; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Helper class for Other Data Testers - */ -public class HCatDataCheckUtil { - - private static final Logger LOG = LoggerFactory.getLogger(HCatDataCheckUtil.class); - - public static Driver instantiateDriver(MiniCluster cluster) { - HiveConf hiveConf = new HiveConf(HCatDataCheckUtil.class); - for (Entry e : cluster.getProperties().entrySet()) { - hiveConf.set(e.getKey().toString(), e.getValue().toString()); - } - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - - LOG.debug("Hive conf : {}", hiveConf.getAllProperties()); - Driver driver = new Driver(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); - return driver; - } - - public static void generateDataFile(MiniCluster cluster, String fileName) throws IOException { - MiniCluster.deleteFile(cluster, fileName); - String[] input = new String[50]; - for (int i = 0; i < 50; i++) { - input[i] = (i % 5) + "\t" + i + "\t" + "_S" + i + "S_"; - } - MiniCluster.createInputFile(cluster, fileName, input); - } - - public static void createTable(Driver driver, String tableName, String createTableArgs) - throws CommandNeedRetryException, IOException { - String createTable = "create table " + tableName + createTableArgs; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]"); - } - } - - public static void dropTable(Driver driver, String tablename) throws IOException, CommandNeedRetryException { - driver.run("drop table if exists " + tablename); - } - - public static ArrayList formattedRun(Driver driver, String name, String selectCmd) - throws CommandNeedRetryException, IOException { - driver.run(selectCmd); - ArrayList src_values = new ArrayList(); - driver.getResults(src_values); - LOG.info("{} : {}", name, src_values); - return src_values; - } - - - public static boolean recordsEqual(HCatRecord first, HCatRecord second) { - return (compareRecords(first, second) == 0); - } - - public static int compareRecords(HCatRecord first, HCatRecord second) { - return compareRecordContents(first.getAll(), second.getAll()); - } - - public static int compareRecordContents(List first, List second) { - int mySz = first.size(); - int urSz = second.size(); - if (mySz != urSz) { - return mySz - urSz; - } else { - for (int i = 0; i < first.size(); i++) { - int c = DataType.compare(first.get(i), second.get(i)); - if (c != 0) { - return c; - } - } - return 0; - } - } - - -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/data/TestDefaultHCatRecord.java hcatalog/core/src/test/java/org/apache/hcatalog/data/TestDefaultHCatRecord.java deleted file mode 100644 index 3a19da6..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/data/TestDefaultHCatRecord.java +++ /dev/null @@ -1,260 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.data; - -import java.io.DataInput; -import java.io.DataInputStream; -import java.io.DataOutput; -import java.io.DataOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; - -import junit.framework.Assert; -import junit.framework.TestCase; - -public class TestDefaultHCatRecord extends TestCase { - - public void testRYW() throws IOException { - - File f = new File("binary.dat"); - f.delete(); - f.createNewFile(); - f.deleteOnExit(); - - OutputStream fileOutStream = new FileOutputStream(f); - DataOutput outStream = new DataOutputStream(fileOutStream); - - HCatRecord[] recs = getHCatRecords(); - for (int i = 0; i < recs.length; i++) { - recs[i].write(outStream); - } - fileOutStream.flush(); - fileOutStream.close(); - - InputStream fInStream = new FileInputStream(f); - DataInput inpStream = new DataInputStream(fInStream); - - for (int i = 0; i < recs.length; i++) { - HCatRecord rec = new DefaultHCatRecord(); - rec.readFields(inpStream); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(recs[i], rec)); - } - - Assert.assertEquals(fInStream.available(), 0); - fInStream.close(); - - } - - public void testCompareTo() { - HCatRecord[] recs = getHCatRecords(); - Assert.assertTrue(HCatDataCheckUtil.compareRecords(recs[0], recs[1]) == 0); - Assert.assertTrue(HCatDataCheckUtil.compareRecords(recs[4], recs[5]) == 0); - } - - public void testEqualsObject() { - - HCatRecord[] recs = getHCatRecords(); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(recs[0], recs[1])); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(recs[4], recs[5])); - } - - /** - * Test get and set calls with type - * @throws HCatException - */ - public void testGetSetByType1() throws HCatException { - HCatRecord inpRec = getHCatRecords()[0]; - HCatRecord newRec = new DefaultHCatRecord(inpRec.size()); - HCatSchema hsch = - HCatSchemaUtils.getHCatSchema( - "a:tinyint,b:smallint,c:int,d:bigint,e:float,f:double,g:boolean,h:string,i:binary,j:string"); - - - newRec.setByte("a", hsch, inpRec.getByte("a", hsch)); - newRec.setShort("b", hsch, inpRec.getShort("b", hsch)); - newRec.setInteger("c", hsch, inpRec.getInteger("c", hsch)); - newRec.setLong("d", hsch, inpRec.getLong("d", hsch)); - newRec.setFloat("e", hsch, inpRec.getFloat("e", hsch)); - newRec.setDouble("f", hsch, inpRec.getDouble("f", hsch)); - newRec.setBoolean("g", hsch, inpRec.getBoolean("g", hsch)); - newRec.setString("h", hsch, inpRec.getString("h", hsch)); - newRec.setByteArray("i", hsch, inpRec.getByteArray("i", hsch)); - newRec.setString("j", hsch, inpRec.getString("j", hsch)); - - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(newRec, inpRec)); - - - } - - /** - * Test get and set calls with type - * @throws HCatException - */ - public void testGetSetByType2() throws HCatException { - HCatRecord inpRec = getGetSet2InpRec(); - - HCatRecord newRec = new DefaultHCatRecord(inpRec.size()); - HCatSchema hsch = - HCatSchemaUtils.getHCatSchema("a:binary,b:map,c:array,d:struct"); - - - newRec.setByteArray("a", hsch, inpRec.getByteArray("a", hsch)); - newRec.setMap("b", hsch, inpRec.getMap("b", hsch)); - newRec.setList("c", hsch, inpRec.getList("c", hsch)); - newRec.setStruct("d", hsch, inpRec.getStruct("d", hsch)); - - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(newRec, inpRec)); - } - - - private HCatRecord getGetSet2InpRec() { - List rlist = new ArrayList(); - - rlist.add(new byte[]{1, 2, 3}); - - Map mapcol = new HashMap(3); - mapcol.put(new Short("2"), "hcat is cool"); - mapcol.put(new Short("3"), "is it?"); - mapcol.put(new Short("4"), "or is it not?"); - rlist.add(mapcol); - - List listcol = new ArrayList(); - listcol.add(314); - listcol.add(007); - rlist.add(listcol);//list - rlist.add(listcol);//struct - return new DefaultHCatRecord(rlist); - } - - private HCatRecord[] getHCatRecords() { - - List rec_1 = new ArrayList(8); - rec_1.add(new Byte("123")); - rec_1.add(new Short("456")); - rec_1.add(new Integer(789)); - rec_1.add(new Long(1000L)); - rec_1.add(new Float(5.3F)); - rec_1.add(new Double(5.3D)); - rec_1.add(new Boolean(true)); - rec_1.add(new String("hcat and hadoop")); - rec_1.add(null); - rec_1.add("null"); - - HCatRecord tup_1 = new DefaultHCatRecord(rec_1); - - List rec_2 = new ArrayList(8); - rec_2.add(new Byte("123")); - rec_2.add(new Short("456")); - rec_2.add(new Integer(789)); - rec_2.add(new Long(1000L)); - rec_2.add(new Float(5.3F)); - rec_2.add(new Double(5.3D)); - rec_2.add(new Boolean(true)); - rec_2.add(new String("hcat and hadoop")); - rec_2.add(null); - rec_2.add("null"); - HCatRecord tup_2 = new DefaultHCatRecord(rec_2); - - List rec_3 = new ArrayList(10); - rec_3.add(new Byte("123")); - rec_3.add(new Short("456")); - rec_3.add(new Integer(789)); - rec_3.add(new Long(1000L)); - rec_3.add(new Double(5.3D)); - rec_3.add(new String("hcat and hadoop")); - rec_3.add(null); - List innerList = new ArrayList(); - innerList.add(314); - innerList.add(007); - rec_3.add(innerList); - Map map = new HashMap(3); - map.put(new Short("2"), "hcat is cool"); - map.put(new Short("3"), "is it?"); - map.put(new Short("4"), "or is it not?"); - rec_3.add(map); - - HCatRecord tup_3 = new DefaultHCatRecord(rec_3); - - List rec_4 = new ArrayList(8); - rec_4.add(new Byte("123")); - rec_4.add(new Short("456")); - rec_4.add(new Integer(789)); - rec_4.add(new Long(1000L)); - rec_4.add(new Double(5.3D)); - rec_4.add(new String("hcat and hadoop")); - rec_4.add(null); - rec_4.add("null"); - - Map map2 = new HashMap(3); - map2.put(new Short("2"), "hcat is cool"); - map2.put(new Short("3"), "is it?"); - map2.put(new Short("4"), "or is it not?"); - rec_4.add(map2); - List innerList2 = new ArrayList(); - innerList2.add(314); - innerList2.add(007); - rec_4.add(innerList2); - HCatRecord tup_4 = new DefaultHCatRecord(rec_4); - - - List rec_5 = new ArrayList(3); - rec_5.add(getByteArray()); - rec_5.add(getStruct()); - rec_5.add(getList()); - HCatRecord tup_5 = new DefaultHCatRecord(rec_5); - - - List rec_6 = new ArrayList(3); - rec_6.add(getByteArray()); - rec_6.add(getStruct()); - rec_6.add(getList()); - HCatRecord tup_6 = new DefaultHCatRecord(rec_6); - - - return new HCatRecord[]{tup_1, tup_2, tup_3, tup_4, tup_5, tup_6}; - - } - - private Object getList() { - return getStruct(); - } - - private Object getByteArray() { - return new byte[]{1, 2, 3, 4}; - } - - private List getStruct() { - List struct = new ArrayList(); - struct.add(new Integer(1)); - struct.add(new String("x")); - return struct; - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/data/TestHCatRecordSerDe.java hcatalog/core/src/test/java/org/apache/hcatalog/data/TestHCatRecordSerDe.java deleted file mode 100644 index ee205b3..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/data/TestHCatRecordSerDe.java +++ /dev/null @@ -1,169 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.data; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Properties; - -import junit.framework.Assert; -import junit.framework.TestCase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; -import org.apache.hadoop.io.Writable; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TestHCatRecordSerDe extends TestCase { - - private static final Logger LOG = LoggerFactory.getLogger(TestHCatRecordSerDe.class); - - public Map getData() { - Map data = new HashMap(); - - List rlist = new ArrayList(11); - rlist.add(new Byte("123")); - rlist.add(new Short("456")); - rlist.add(new Integer(789)); - rlist.add(new Long(1000L)); - rlist.add(new Double(5.3D)); - rlist.add(new Float(2.39F)); - rlist.add(new String("hcat and hadoop")); - rlist.add(null); - - List innerStruct = new ArrayList(2); - innerStruct.add(new String("abc")); - innerStruct.add(new String("def")); - rlist.add(innerStruct); - - List innerList = new ArrayList(); - innerList.add(314); - innerList.add(007); - rlist.add(innerList); - - Map map = new HashMap(3); - map.put(new Short("2"), "hcat is cool"); - map.put(new Short("3"), "is it?"); - map.put(new Short("4"), "or is it not?"); - rlist.add(map); - - rlist.add(new Boolean(true)); - - List c1 = new ArrayList(); - List c1_1 = new ArrayList(); - c1_1.add(new Integer(12)); - List i2 = new ArrayList(); - List ii1 = new ArrayList(); - ii1.add(new Integer(13)); - ii1.add(new Integer(14)); - i2.add(ii1); - Map> ii2 = new HashMap>(); - List iii1 = new ArrayList(); - iii1.add(new Integer(15)); - ii2.put("phew", iii1); - i2.add(ii2); - c1_1.add(i2); - c1.add(c1_1); - rlist.add(c1); - List am = new ArrayList(); - Map am_1 = new HashMap(); - am_1.put("noo", "haha"); - am.add(am_1); - rlist.add(am); - List aa = new ArrayList(); - List aa_1 = new ArrayList(); - aa_1.add("bloo"); - aa_1.add("bwahaha"); - aa.add(aa_1); - rlist.add(aa); - - String typeString = - "tinyint,smallint,int,bigint,double,float,string,string," - + "struct,array,map,boolean," - + "array,ii2:map>>>>," - + "array>,array>"; - Properties props = new Properties(); - - props.put(serdeConstants.LIST_COLUMNS, "ti,si,i,bi,d,f,s,n,r,l,m,b,c1,am,aa"); - props.put(serdeConstants.LIST_COLUMN_TYPES, typeString); -// props.put(Constants.SERIALIZATION_NULL_FORMAT, "\\N"); -// props.put(Constants.SERIALIZATION_FORMAT, "1"); - - data.put(props, new DefaultHCatRecord(rlist)); - return data; - } - - public void testRW() throws Exception { - - Configuration conf = new Configuration(); - - for (Entry e : getData().entrySet()) { - Properties tblProps = e.getKey(); - HCatRecord r = e.getValue(); - - HCatRecordSerDe hrsd = new HCatRecordSerDe(); - hrsd.initialize(conf, tblProps); - - LOG.info("ORIG: {}", r); - - Writable s = hrsd.serialize(r, hrsd.getObjectInspector()); - LOG.info("ONE: {}", s); - - HCatRecord r2 = (HCatRecord) hrsd.deserialize(s); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, r2)); - - // If it went through correctly, then s is also a HCatRecord, - // and also equal to the above, and a deepcopy, and this holds - // through for multiple levels more of serialization as well. - - Writable s2 = hrsd.serialize(s, hrsd.getObjectInspector()); - LOG.info("TWO: {}", s2); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) s)); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) s2)); - - // serialize using another serde, and read out that object repr. - LazySimpleSerDe testSD = new LazySimpleSerDe(); - testSD.initialize(conf, tblProps); - - Writable s3 = testSD.serialize(s, hrsd.getObjectInspector()); - LOG.info("THREE: {}", s3); - Object o3 = testSD.deserialize(s3); - Assert.assertFalse(r.getClass().equals(o3.getClass())); - - // then serialize again using hrsd, and compare results - HCatRecord s4 = (HCatRecord) hrsd.serialize(o3, testSD.getObjectInspector()); - LOG.info("FOUR: {}", s4); - - // Test LazyHCatRecord init and read - LazyHCatRecord s5 = new LazyHCatRecord(o3, testSD.getObjectInspector()); - LOG.info("FIVE: {}", s5); - - LazyHCatRecord s6 = new LazyHCatRecord(s4, hrsd.getObjectInspector()); - LOG.info("SIX: {}", s6); - - } - - } - -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/data/TestJsonSerDe.java hcatalog/core/src/test/java/org/apache/hcatalog/data/TestJsonSerDe.java deleted file mode 100644 index 48dc766..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/data/TestJsonSerDe.java +++ /dev/null @@ -1,214 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.data; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import junit.framework.TestCase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.io.Writable; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TestJsonSerDe extends TestCase { - - private static final Logger LOG = LoggerFactory.getLogger(TestJsonSerDe.class); - - public List> getData() { - List> data = new ArrayList>(); - - List rlist = new ArrayList(13); - rlist.add(new Byte("123")); - rlist.add(new Short("456")); - rlist.add(new Integer(789)); - rlist.add(new Long(1000L)); - rlist.add(new Double(5.3D)); - rlist.add(new Float(2.39F)); - rlist.add(new String("hcat and hadoop")); - rlist.add(null); - - List innerStruct = new ArrayList(2); - innerStruct.add(new String("abc")); - innerStruct.add(new String("def")); - rlist.add(innerStruct); - - List innerList = new ArrayList(); - innerList.add(314); - innerList.add(007); - rlist.add(innerList); - - Map map = new HashMap(3); - map.put(new Short("2"), "hcat is cool"); - map.put(new Short("3"), "is it?"); - map.put(new Short("4"), "or is it not?"); - rlist.add(map); - - rlist.add(new Boolean(true)); - - List c1 = new ArrayList(); - List c1_1 = new ArrayList(); - c1_1.add(new Integer(12)); - List i2 = new ArrayList(); - List ii1 = new ArrayList(); - ii1.add(new Integer(13)); - ii1.add(new Integer(14)); - i2.add(ii1); - Map> ii2 = new HashMap>(); - List iii1 = new ArrayList(); - iii1.add(new Integer(15)); - ii2.put("phew", iii1); - i2.add(ii2); - c1_1.add(i2); - c1.add(c1_1); - rlist.add(c1); - - List nlist = new ArrayList(13); - nlist.add(null); // tinyint - nlist.add(null); // smallint - nlist.add(null); // int - nlist.add(null); // bigint - nlist.add(null); // double - nlist.add(null); // float - nlist.add(null); // string - nlist.add(null); // string - nlist.add(null); // struct - nlist.add(null); // array - nlist.add(null); // map - nlist.add(null); // bool - nlist.add(null); // complex - - String typeString = - "tinyint,smallint,int,bigint,double,float,string,string," - + "struct,array,map,boolean," - + "array,ii2:map>>>>"; - Properties props = new Properties(); - - props.put(serdeConstants.LIST_COLUMNS, "ti,si,i,bi,d,f,s,n,r,l,m,b,c1"); - props.put(serdeConstants.LIST_COLUMN_TYPES, typeString); -// props.put(Constants.SERIALIZATION_NULL_FORMAT, "\\N"); -// props.put(Constants.SERIALIZATION_FORMAT, "1"); - - data.add(new Pair(props, new DefaultHCatRecord(rlist))); - data.add(new Pair(props, new DefaultHCatRecord(nlist))); - return data; - } - - public void testRW() throws Exception { - - Configuration conf = new Configuration(); - - for (Pair e : getData()) { - Properties tblProps = e.first; - HCatRecord r = e.second; - - HCatRecordSerDe hrsd = new HCatRecordSerDe(); - hrsd.initialize(conf, tblProps); - - JsonSerDe jsde = new JsonSerDe(); - jsde.initialize(conf, tblProps); - - LOG.info("ORIG:{}", r); - - Writable s = hrsd.serialize(r, hrsd.getObjectInspector()); - LOG.info("ONE:{}", s); - - Object o1 = hrsd.deserialize(s); - assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o1)); - - Writable s2 = jsde.serialize(o1, hrsd.getObjectInspector()); - LOG.info("TWO:{}", s2); - Object o2 = jsde.deserialize(s2); - LOG.info("deserialized TWO : {} ", o2); - - assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2)); - } - - } - - public void testRobustRead() throws Exception { - /** - * This test has been added to account for HCATALOG-436 - * We write out columns with "internal column names" such - * as "_col0", but try to read with retular column names. - */ - - Configuration conf = new Configuration(); - - for (Pair e : getData()) { - Properties tblProps = e.first; - HCatRecord r = e.second; - - Properties internalTblProps = new Properties(); - for (Map.Entry pe : tblProps.entrySet()) { - if (!pe.getKey().equals(serdeConstants.LIST_COLUMNS)) { - internalTblProps.put(pe.getKey(), pe.getValue()); - } else { - internalTblProps.put(pe.getKey(), getInternalNames((String) pe.getValue())); - } - } - - LOG.info("orig tbl props:{}", tblProps); - LOG.info("modif tbl props:{}", internalTblProps); - - JsonSerDe wjsd = new JsonSerDe(); - wjsd.initialize(conf, internalTblProps); - - JsonSerDe rjsd = new JsonSerDe(); - rjsd.initialize(conf, tblProps); - - LOG.info("ORIG:{}", r); - - Writable s = wjsd.serialize(r, wjsd.getObjectInspector()); - LOG.info("ONE:{}", s); - - Object o1 = wjsd.deserialize(s); - LOG.info("deserialized ONE : {} ", o1); - - Object o2 = rjsd.deserialize(s); - LOG.info("deserialized TWO : {} ", o2); - assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2)); - } - - } - - String getInternalNames(String columnNames) { - if (columnNames == null) { - return null; - } - if (columnNames.isEmpty()) { - return ""; - } - - StringBuffer sb = new StringBuffer(); - int numStrings = columnNames.split(",").length; - sb.append("_col0"); - for (int i = 1; i < numStrings; i++) { - sb.append(","); - sb.append(HiveConf.getColumnInternalName(i)); - } - return sb.toString(); - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/data/TestLazyHCatRecord.java hcatalog/core/src/test/java/org/apache/hcatalog/data/TestLazyHCatRecord.java deleted file mode 100644 index 12845b4..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/data/TestLazyHCatRecord.java +++ /dev/null @@ -1,193 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.data; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; -import org.junit.Assert; -import org.junit.Test; - -public class TestLazyHCatRecord { - - private final int INT_CONST = 789; - private final long LONG_CONST = 5000000000L; - private final double DOUBLE_CONST = 3.141592654; - private final String STRING_CONST = "hello world"; - - @Test - public void testGet() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - Assert.assertEquals(INT_CONST, ((Integer) r.get(0)).intValue()); - Assert.assertEquals(LONG_CONST, ((Long) r.get(1)).longValue()); - Assert.assertEquals(DOUBLE_CONST, ((Double) r.get(2)).doubleValue(), 0); - Assert.assertEquals(STRING_CONST, (String) r.get(3)); - } - - @Test - public void testGetWithName() throws Exception { - TypeInfo ti = getTypeInfo(); - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector(ti)); - HCatSchema schema = HCatSchemaUtils.getHCatSchema(ti) - .get(0).getStructSubSchema(); - Assert.assertEquals(INT_CONST, ((Integer) r.get("an_int", schema)).intValue()); - Assert.assertEquals(LONG_CONST, ((Long) r.get("a_long", schema)).longValue()); - Assert.assertEquals(DOUBLE_CONST, ((Double) r.get("a_double", schema)).doubleValue(), 0); - Assert.assertEquals(STRING_CONST, (String) r.get("a_string", schema)); - } - - @Test - public void testGetAll() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - List list = r.getAll(); - Assert.assertEquals(INT_CONST, ((Integer) list.get(0)).intValue()); - Assert.assertEquals(LONG_CONST, ((Long) list.get(1)).longValue()); - Assert.assertEquals(DOUBLE_CONST, ((Double) list.get(2)).doubleValue(), 0); - Assert.assertEquals(STRING_CONST, (String) list.get(3)); - } - - @Test - public void testSet() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - boolean sawException = false; - try { - r.set(3, "Mary had a little lamb"); - } catch (UnsupportedOperationException uoe) { - sawException = true; - } - Assert.assertTrue(sawException); - } - - @Test - public void testSize() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - Assert.assertEquals(4, r.size()); - } - - @Test - public void testReadFields() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - boolean sawException = false; - try { - r.readFields(null); - } catch (UnsupportedOperationException uoe) { - sawException = true; - } - Assert.assertTrue(sawException); - } - - @Test - public void testWrite() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - boolean sawException = false; - try { - r.write(null); - } catch (UnsupportedOperationException uoe) { - sawException = true; - } - Assert.assertTrue(sawException); - } - - @Test - public void testSetWithName() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - boolean sawException = false; - try { - r.set("fred", null, "bob"); - } catch (UnsupportedOperationException uoe) { - sawException = true; - } - Assert.assertTrue(sawException); - } - - @Test - public void testRemove() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - boolean sawException = false; - try { - r.remove(0); - } catch (UnsupportedOperationException uoe) { - sawException = true; - } - Assert.assertTrue(sawException); - } - - @Test - public void testCopy() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); - boolean sawException = false; - try { - r.copy(null); - } catch (UnsupportedOperationException uoe) { - sawException = true; - } - Assert.assertTrue(sawException); - } - - @Test - public void testGetWritable() throws Exception { - HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()).getWritable(); - Assert.assertEquals(INT_CONST, ((Integer) r.get(0)).intValue()); - Assert.assertEquals(LONG_CONST, ((Long) r.get(1)).longValue()); - Assert.assertEquals(DOUBLE_CONST, ((Double) r.get(2)).doubleValue(), 0); - Assert.assertEquals(STRING_CONST, (String) r.get(3)); - Assert.assertEquals("org.apache.hcatalog.data.DefaultHCatRecord", r.getClass().getName()); - } - - private HCatRecord getHCatRecord() throws Exception { - List rec_1 = new ArrayList(4); - rec_1.add( new Integer(INT_CONST)); - rec_1.add( new Long(LONG_CONST)); - rec_1.add( new Double(DOUBLE_CONST)); - rec_1.add( new String(STRING_CONST)); - - return new DefaultHCatRecord(rec_1); - } - - private TypeInfo getTypeInfo() throws Exception { - List names = new ArrayList(4); - names.add("an_int"); - names.add("a_long"); - names.add("a_double"); - names.add("a_string"); - - List tis = new ArrayList(4); - tis.add(TypeInfoFactory.getPrimitiveTypeInfo("int")); - tis.add(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); - tis.add(TypeInfoFactory.getPrimitiveTypeInfo("double")); - tis.add(TypeInfoFactory.getPrimitiveTypeInfo("string")); - - return TypeInfoFactory.getStructTypeInfo(names, tis); - } - - private ObjectInspector getObjectInspector(TypeInfo ti) throws Exception { - return HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector((StructTypeInfo)ti); - } - - private ObjectInspector getObjectInspector() throws Exception { - return HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector( - (StructTypeInfo)getTypeInfo()); - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/data/TestReaderWriter.java hcatalog/core/src/test/java/org/apache/hcatalog/data/TestReaderWriter.java deleted file mode 100644 index 006b760..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/data/TestReaderWriter.java +++ /dev/null @@ -1,183 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.data; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.transfer.DataTransferFactory; -import org.apache.hcatalog.data.transfer.HCatReader; -import org.apache.hcatalog.data.transfer.HCatWriter; -import org.apache.hcatalog.data.transfer.ReadEntity; -import org.apache.hcatalog.data.transfer.ReaderContext; -import org.apache.hcatalog.data.transfer.WriteEntity; -import org.apache.hcatalog.data.transfer.WriterContext; -import org.apache.hcatalog.mapreduce.HCatBaseTest; -import org.junit.Assert; -import org.junit.Test; - -public class TestReaderWriter extends HCatBaseTest { - - @Test - public void test() throws MetaException, CommandNeedRetryException, - IOException, ClassNotFoundException { - - driver.run("drop table mytbl"); - driver.run("create table mytbl (a string, b int)"); - Iterator> itr = hiveConf.iterator(); - Map map = new HashMap(); - while (itr.hasNext()) { - Entry kv = itr.next(); - map.put(kv.getKey(), kv.getValue()); - } - - WriterContext cntxt = runsInMaster(map); - - File writeCntxtFile = File.createTempFile("hcat-write", "temp"); - writeCntxtFile.deleteOnExit(); - - // Serialize context. - ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(writeCntxtFile)); - oos.writeObject(cntxt); - oos.flush(); - oos.close(); - - // Now, deserialize it. - ObjectInputStream ois = new ObjectInputStream(new FileInputStream(writeCntxtFile)); - cntxt = (WriterContext) ois.readObject(); - ois.close(); - - runsInSlave(cntxt); - commit(map, true, cntxt); - - ReaderContext readCntxt = runsInMaster(map, false); - - File readCntxtFile = File.createTempFile("hcat-read", "temp"); - readCntxtFile.deleteOnExit(); - oos = new ObjectOutputStream(new FileOutputStream(readCntxtFile)); - oos.writeObject(readCntxt); - oos.flush(); - oos.close(); - - ois = new ObjectInputStream(new FileInputStream(readCntxtFile)); - readCntxt = (ReaderContext) ois.readObject(); - ois.close(); - - for (InputSplit split : readCntxt.getSplits()) { - runsInSlave(split, readCntxt.getConf()); - } - } - - private WriterContext runsInMaster(Map config) throws HCatException { - - WriteEntity.Builder builder = new WriteEntity.Builder(); - WriteEntity entity = builder.withTable("mytbl").build(); - HCatWriter writer = DataTransferFactory.getHCatWriter(entity, config); - WriterContext info = writer.prepareWrite(); - return info; - } - - private ReaderContext runsInMaster(Map config, boolean bogus) - throws HCatException { - ReadEntity entity = new ReadEntity.Builder().withTable("mytbl").build(); - HCatReader reader = DataTransferFactory.getHCatReader(entity, config); - ReaderContext cntxt = reader.prepareRead(); - return cntxt; - } - - private void runsInSlave(InputSplit split, Configuration config) throws HCatException { - - HCatReader reader = DataTransferFactory.getHCatReader(split, config); - Iterator itr = reader.read(); - int i = 1; - while (itr.hasNext()) { - HCatRecord read = itr.next(); - HCatRecord written = getRecord(i++); - // Argh, HCatRecord doesnt implement equals() - Assert.assertTrue("Read: " + read.get(0) + "Written: " + written.get(0), - written.get(0).equals(read.get(0))); - Assert.assertTrue("Read: " + read.get(1) + "Written: " + written.get(1), - written.get(1).equals(read.get(1))); - Assert.assertEquals(2, read.size()); - } - //Assert.assertFalse(itr.hasNext()); - } - - private void runsInSlave(WriterContext context) throws HCatException { - - HCatWriter writer = DataTransferFactory.getHCatWriter(context); - writer.write(new HCatRecordItr()); - } - - private void commit(Map config, boolean status, - WriterContext context) throws IOException { - - WriteEntity.Builder builder = new WriteEntity.Builder(); - WriteEntity entity = builder.withTable("mytbl").build(); - HCatWriter writer = DataTransferFactory.getHCatWriter(entity, config); - if (status) { - writer.commit(context); - } else { - writer.abort(context); - } - } - - private static HCatRecord getRecord(int i) { - List list = new ArrayList(2); - list.add("Row #: " + i); - list.add(i); - return new DefaultHCatRecord(list); - } - - private static class HCatRecordItr implements Iterator { - - int i = 0; - - @Override - public boolean hasNext() { - return i++ < 100 ? true : false; - } - - @Override - public HCatRecord next() { - return getRecord(i); - } - - @Override - public void remove() { - throw new RuntimeException(); - } - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/data/schema/TestHCatSchema.java hcatalog/core/src/test/java/org/apache/hcatalog/data/schema/TestHCatSchema.java deleted file mode 100644 index 5f53842..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/data/schema/TestHCatSchema.java +++ /dev/null @@ -1,103 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.data.schema; - -import junit.framework.TestCase; -import org.apache.hcatalog.common.HCatException; - -import java.util.ArrayList; -import java.util.List; - -public class TestHCatSchema extends TestCase { - public void testCannotAddFieldMoreThanOnce() throws HCatException { - List fieldSchemaList = new ArrayList(); - fieldSchemaList.add(new HCatFieldSchema("name", HCatFieldSchema.Type.STRING, "What's your handle?")); - fieldSchemaList.add(new HCatFieldSchema("age", HCatFieldSchema.Type.INT, "So very old")); - - HCatSchema schema = new HCatSchema(fieldSchemaList); - - assertTrue(schema.getFieldNames().contains("age")); - assertEquals(2, schema.getFields().size()); - - try { - schema.append(new HCatFieldSchema("age", HCatFieldSchema.Type.INT, "So very old")); - fail("Was able to append field schema with same name"); - } catch (HCatException he) { - assertTrue(he.getMessage().contains("Attempt to append HCatFieldSchema with already existing name: age.")); - } - - assertTrue(schema.getFieldNames().contains("age")); - assertEquals(2, schema.getFields().size()); - - // Should also not be able to add fields of different types with same name - try { - schema.append(new HCatFieldSchema("age", HCatFieldSchema.Type.STRING, "Maybe spelled out?")); - fail("Was able to append field schema with same name"); - } catch (HCatException he) { - assertTrue(he.getMessage().contains("Attempt to append HCatFieldSchema with already existing name: age.")); - } - - assertTrue(schema.getFieldNames().contains("age")); - assertEquals(2, schema.getFields().size()); - } - - public void testHashCodeEquals() throws HCatException { - HCatFieldSchema memberID1 = new HCatFieldSchema("memberID", HCatFieldSchema.Type.INT, "as a number"); - HCatFieldSchema memberID2 = new HCatFieldSchema("memberID", HCatFieldSchema.Type.INT, "as a number"); - assertTrue("Expected objects to be equal", memberID1.equals(memberID2)); - assertTrue("Expected hash codes to be equal", memberID1.hashCode() == memberID2.hashCode()); - } - - public void testCannotInstantiateSchemaWithRepeatedFieldNames() throws HCatException { - List fieldSchemaList = new ArrayList(); - - fieldSchemaList.add(new HCatFieldSchema("memberID", HCatFieldSchema.Type.INT, "as a number")); - fieldSchemaList.add(new HCatFieldSchema("location", HCatFieldSchema.Type.STRING, "there's Waldo")); - - // No duplicate names. This should be ok - HCatSchema schema = new HCatSchema(fieldSchemaList); - - fieldSchemaList.add(new HCatFieldSchema("memberID", HCatFieldSchema.Type.STRING, "as a String")); - - // Now a duplicated field name. Should fail - try { - HCatSchema schema2 = new HCatSchema(fieldSchemaList); - fail("Able to add duplicate field name"); - } catch (IllegalArgumentException iae) { - assertTrue(iae.getMessage().contains("Field named memberID already exists")); - } - } - public void testRemoveAddField() throws HCatException { - List fieldSchemaList = new ArrayList(); - - fieldSchemaList.add(new HCatFieldSchema("memberID", HCatFieldSchema.Type.INT, "as a number")); - HCatFieldSchema locationField = new HCatFieldSchema("location", HCatFieldSchema.Type.STRING, "there's Waldo"); - fieldSchemaList.add(locationField); - HCatSchema schema = new HCatSchema(fieldSchemaList); - schema.remove(locationField); - Integer position = schema.getPosition(locationField.getName()); - assertTrue("position is not null after remove" , position == null); - try { - schema.append(locationField); - } - catch (HCatException ex) { - assertFalse(ex.getMessage(), true); - } - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/data/schema/TestHCatSchemaUtils.java hcatalog/core/src/test/java/org/apache/hcatalog/data/schema/TestHCatSchemaUtils.java deleted file mode 100644 index 4bc0f85..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/data/schema/TestHCatSchemaUtils.java +++ /dev/null @@ -1,82 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.data.schema; - -import java.io.PrintStream; - -import junit.framework.TestCase; - -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.schema.HCatFieldSchema.Category; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TestHCatSchemaUtils extends TestCase { - - private static final Logger LOG = LoggerFactory.getLogger(TestHCatSchemaUtils.class); - - public void testSimpleOperation() throws Exception { - String typeString = "struct," - + "currently_registered_courses:array," - + "current_grades:map," - + "phnos:array>,blah:array>"; - - TypeInfo ti = TypeInfoUtils.getTypeInfoFromTypeString(typeString); - - HCatSchema hsch = HCatSchemaUtils.getHCatSchemaFromTypeString(typeString); - LOG.info("Type name : {}", ti.getTypeName()); - LOG.info("HCatSchema : {}", hsch); - assertEquals(hsch.size(), 1); - assertEquals(ti.getTypeName(), hsch.get(0).getTypeString()); - assertEquals(hsch.get(0).getTypeString(), typeString); - } - - @SuppressWarnings("unused") - private void pretty_print(PrintStream pout, HCatSchema hsch) throws HCatException { - pretty_print(pout, hsch, ""); - } - - - private void pretty_print(PrintStream pout, HCatSchema hsch, String prefix) throws HCatException { - int i = 0; - for (HCatFieldSchema field : hsch.getFields()) { - pretty_print(pout, field, prefix + "." + (field.getName() == null ? i : field.getName())); - i++; - } - } - - private void pretty_print(PrintStream pout, HCatFieldSchema hfsch, String prefix) throws HCatException { - - Category tcat = hfsch.getCategory(); - if (Category.STRUCT == tcat) { - pretty_print(pout, hfsch.getStructSubSchema(), prefix); - } else if (Category.ARRAY == tcat) { - pretty_print(pout, hfsch.getArrayElementSchema(), prefix); - } else if (Category.MAP == tcat) { - pout.println(prefix + ".mapkey:\t" + hfsch.getMapKeyType().toString()); - pretty_print(pout, hfsch.getMapValueSchema(), prefix + ".mapvalue:"); - } else { - pout.println(prefix + "\t" + hfsch.getType().toString()); - } - } - -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/fileformats/TestOrcDynamicPartitioned.java hcatalog/core/src/test/java/org/apache/hcatalog/fileformats/TestOrcDynamicPartitioned.java deleted file mode 100644 index f572bfe..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/fileformats/TestOrcDynamicPartitioned.java +++ /dev/null @@ -1,52 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.fileformats; - -import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; -import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; -import org.apache.hadoop.hive.ql.io.orc.OrcSerde; -import org.apache.hcatalog.mapreduce.TestHCatDynamicPartitioned; -import org.junit.BeforeClass; - -public class TestOrcDynamicPartitioned extends TestHCatDynamicPartitioned { - - @BeforeClass - public static void generateInputData() throws Exception { - tableName = "testOrcDynamicPartitionedTable"; - generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); - generateDataColumns(); - } - - @Override - protected String inputFormat() { - return OrcInputFormat.class.getName(); - } - - @Override - protected String outputFormat() { - return OrcOutputFormat.class.getName(); - } - - @Override - protected String serdeClass() { - return OrcSerde.class.getName(); - } - -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/mapred/TestHiveHCatInputFormat.java.broken hcatalog/core/src/test/java/org/apache/hcatalog/mapred/TestHiveHCatInputFormat.java.broken deleted file mode 100644 index 082d723..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/mapred/TestHiveHCatInputFormat.java.broken +++ /dev/null @@ -1,193 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapred; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Properties; - -import junit.framework.TestCase; - -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hcatalog.MiniCluster; -import org.apache.hcatalog.data.HCatDataCheckUtil; -import org.apache.hcatalog.mapred.HCatMapredInputFormat; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.storagehandler.HCatStorageHandlerImpl; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.impl.util.UDFContext; - -public class TestHiveHCatInputFormat extends TestCase { - private static MiniCluster cluster = MiniCluster.buildCluster(); - private static Driver driver; - - String PTNED_TABLE = "junit_testhiveinputintegration_ptni"; - String UNPTNED_TABLE = "junit_testhiveinputintegration_noptn"; - String basicFile = "/tmp/"+PTNED_TABLE+".file"; - - public void testFromHive() throws Exception { - if (driver == null){ - driver = HCatDataCheckUtil.instantiateDriver(cluster); - } - - Properties props = new Properties(); - props.setProperty("fs.default.name", cluster.getProperties().getProperty("fs.default.name")); - String basicFileFullName = cluster.getProperties().getProperty("fs.default.name") + basicFile; - - cleanup(); - - // create source data file - HCatDataCheckUtil.generateDataFile(cluster,basicFile); - - String createPtnedTable = "(j int, s string) partitioned by (i int) " - +"stored by '"+HCatStorageHandlerImpl.class.getName()+"' tblproperties" - + "('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," - + "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver') "; - - HCatDataCheckUtil.createTable(driver,PTNED_TABLE,createPtnedTable); - - String createUnptnedTable = "(i int, j int, s string) " - +"stored by '"+HCatStorageHandlerImpl.class.getName()+"' tblproperties" - + "('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," - + "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver') "; - - HCatDataCheckUtil.createTable(driver,UNPTNED_TABLE,createUnptnedTable); - - - driver.run("describe extended "+UNPTNED_TABLE); - ArrayList des_values = new ArrayList(); - driver.getResults(des_values); - for (String s : des_values){ - System.err.println("du:"+s); - } - - driver.run("describe extended "+PTNED_TABLE); - ArrayList des2_values = new ArrayList(); - driver.getResults(des2_values); - for (String s : des2_values){ - System.err.println("dp:"+s); - } - - // use pig to read from source file and put into this table - - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - server.setBatchOn(); - server.registerQuery("A = load '"+basicFileFullName+"' as (i:int, j:int, s:chararray);"); - server.registerQuery("store A into '"+UNPTNED_TABLE+"' using org.apache.hcatalog.pig.HCatStorer();"); - server.executeBatch(); - - server.setBatchOn(); - server.registerQuery("A = load '"+basicFileFullName+"' as (i:int, j:int, s:chararray);"); - server.registerQuery("store A into '"+PTNED_TABLE+"' using org.apache.hcatalog.pig.HCatStorer();"); - server.executeBatch(); - - // partitioned by i - // select * from tbl; - // select j,s,i from tbl; - // select * from tbl where i = 3; - // select j,s,i from tbl where i = 3; - // select * from tbl where j = 3; - // select j,s,i from tbl where j = 3; - - ArrayList p_select_star_nofilter = HCatDataCheckUtil.formattedRun(driver, - "p_select_star_nofilter","select * from "+PTNED_TABLE); - ArrayList p_select_named_nofilter = HCatDataCheckUtil.formattedRun(driver, - "p_select_named_nofilter","select j,s,i from "+PTNED_TABLE); - - assertDataIdentical(p_select_star_nofilter,p_select_named_nofilter,50); - - ArrayList p_select_star_ptnfilter = HCatDataCheckUtil.formattedRun(driver, - "p_select_star_ptnfilter","select * from "+PTNED_TABLE+" where i = 3"); - ArrayList p_select_named_ptnfilter = HCatDataCheckUtil.formattedRun(driver, - "p_select_named_ptnfilter","select j,s,i from "+PTNED_TABLE+" where i = 3"); - - assertDataIdentical(p_select_star_ptnfilter,p_select_named_ptnfilter,10); - - ArrayList select_star_nonptnfilter = HCatDataCheckUtil.formattedRun(driver, - "select_star_nonptnfilter","select * from "+PTNED_TABLE+" where j = 28"); - ArrayList select_named_nonptnfilter = HCatDataCheckUtil.formattedRun(driver, - "select_named_nonptnfilter","select j,s,i from "+PTNED_TABLE+" where j = 28"); - - assertDataIdentical(select_star_nonptnfilter,select_named_nonptnfilter,1); - - // non-partitioned - // select * from tbl; - // select i,j,s from tbl; - // select * from tbl where i = 3; - // select i,j,s from tbl where i = 3; - - // select j,s,i from tbl; - // select j,s,i from tbl where i = 3; - - ArrayList select_star_nofilter = HCatDataCheckUtil.formattedRun(driver, - "select_star_nofilter","select * from "+UNPTNED_TABLE); //i,j,s select * order is diff for unptn - ArrayList select_ijs_nofilter = HCatDataCheckUtil.formattedRun(driver, - "select_ijs_nofilter","select i,j,s from "+UNPTNED_TABLE); - - assertDataIdentical(select_star_nofilter,select_ijs_nofilter,50); - - ArrayList select_star_ptnfilter = HCatDataCheckUtil.formattedRun(driver, - "select_star_ptnfilter","select * from "+UNPTNED_TABLE+" where i = 3"); //i,j,s - ArrayList select_ijs_ptnfilter = HCatDataCheckUtil.formattedRun(driver, - "select_ijs_ptnfilter","select i,j,s from "+UNPTNED_TABLE+" where i = 3"); - - assertDataIdentical(select_star_ptnfilter,select_ijs_ptnfilter,10); - - ArrayList select_jsi_nofilter = HCatDataCheckUtil.formattedRun(driver, - "select_jsi_nofilter","select j,s,i from "+UNPTNED_TABLE); - assertDataIdentical(p_select_named_nofilter,select_jsi_nofilter,50,true); - - ArrayList select_jsi_ptnfilter = HCatDataCheckUtil.formattedRun(driver, - "select_jsi_ptnfilter","select j,s,i from "+UNPTNED_TABLE+" where i = 3"); - assertDataIdentical(p_select_named_ptnfilter,select_jsi_ptnfilter,10,true); - - } - - private void assertDataIdentical(ArrayList result1, - ArrayList result2, int numRecords) { - assertDataIdentical(result1,result2,numRecords,false); - } - - private void assertDataIdentical(ArrayList result1, - ArrayList result2, int numRecords,boolean doSort) { - assertEquals(numRecords, result1.size()); - assertEquals(numRecords, result2.size()); - Collections.sort(result1); - Collections.sort(result2); - for (int i = 0; i < numRecords; i++){ - assertEquals(result1.get(i),result2.get(i)); - } - } - - - private void cleanup() throws IOException, CommandNeedRetryException { - MiniCluster.deleteFile(cluster, basicFile); - HCatDataCheckUtil.dropTable(driver,PTNED_TABLE); - HCatDataCheckUtil.dropTable(driver,UNPTNED_TABLE); - } - -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatBaseTest.java hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatBaseTest.java deleted file mode 100644 index a069ab5..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatBaseTest.java +++ /dev/null @@ -1,86 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.pig.PigServer; -import org.junit.Assert; -import org.junit.Before; -import org.junit.BeforeClass; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.io.IOException; - -/** - * Simplify writing HCatalog tests that require a HiveMetaStore. - */ -public class HCatBaseTest { - protected static final Logger LOG = LoggerFactory.getLogger(HCatBaseTest.class); - protected static final String TEST_DATA_DIR = - "/tmp/build/test/data/" + HCatBaseTest.class.getCanonicalName(); - protected static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; - - protected HiveConf hiveConf = null; - protected Driver driver = null; - protected HiveMetaStoreClient client = null; - - @BeforeClass - public static void setUpTestDataDir() throws Exception { - LOG.info("Using warehouse directory " + TEST_WAREHOUSE_DIR); - File f = new File(TEST_WAREHOUSE_DIR); - if (f.exists()) { - FileUtil.fullyDelete(f); - } - Assert.assertTrue(new File(TEST_WAREHOUSE_DIR).mkdirs()); - } - - @Before - public void setUp() throws Exception { - if (driver == null) { - setUpHiveConf(); - driver = new Driver(hiveConf); - client = new HiveMetaStoreClient(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); - } - } - - /** - * Create a new HiveConf and set properties necessary for unit tests. - */ - protected void setUpHiveConf() { - hiveConf = new HiveConf(this.getClass()); - hiveConf.setVar(HiveConf.ConfVars.PREEXECHOOKS, ""); - hiveConf.setVar(HiveConf.ConfVars.POSTEXECHOOKS, ""); - hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false); - hiveConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, TEST_WAREHOUSE_DIR); - } - - protected void logAndRegister(PigServer server, String query) throws IOException { - LOG.info("Registering pig query: " + query); - server.registerQuery(query); - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java deleted file mode 100644 index 283ecde..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/HCatMapReduceTest.java +++ /dev/null @@ -1,383 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import junit.framework.Assert; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.TableType; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.SerDeInfo; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobStatus; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.junit.After; -import org.junit.Before; -import org.junit.BeforeClass; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import static org.junit.Assert.assertTrue; - -/** - * Test for HCatOutputFormat. Writes a partition using HCatOutputFormat and reads - * it back using HCatInputFormat, checks the column values and counts. - */ -public abstract class HCatMapReduceTest extends HCatBaseTest { - - private static final Logger LOG = LoggerFactory.getLogger(HCatMapReduceTest.class); - protected static String dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; - protected static String tableName = "testHCatMapReduceTable"; - - private static List writeRecords = new ArrayList(); - private static List readRecords = new ArrayList(); - - protected abstract List getPartitionKeys(); - - protected abstract List getTableColumns(); - - private static FileSystem fs; - - protected Boolean isTableExternal() { - return false; - } - - protected String inputFormat() { - return RCFileInputFormat.class.getName(); - } - - protected String outputFormat() { - return RCFileOutputFormat.class.getName(); - } - - protected String serdeClass() { - return ColumnarSerDe.class.getName(); - } - - @BeforeClass - public static void setUpOneTime() throws Exception { - fs = new LocalFileSystem(); - fs.initialize(fs.getWorkingDirectory().toUri(), new Configuration()); - - HiveConf hiveConf = new HiveConf(); - hiveConf.setInt(HCatConstants.HCAT_HIVE_CLIENT_EXPIRY_TIME, 0); - // Hack to initialize cache with 0 expiry time causing it to return a new hive client every time - // Otherwise the cache doesn't play well with the second test method with the client gets closed() in the - // tearDown() of the previous test - HCatUtil.getHiveClient(hiveConf); - - MapCreate.writeCount = 0; - MapRead.readCount = 0; - } - - @After - public void deleteTable() throws Exception { - try { - String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : dbName; - - client.dropTable(databaseName, tableName); - } catch (Exception e) { - e.printStackTrace(); - throw e; - } - } - - @Before - public void createTable() throws Exception { - String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : dbName; - - try { - client.dropTable(databaseName, tableName); - } catch (Exception e) { - } //can fail with NoSuchObjectException - - - Table tbl = new Table(); - tbl.setDbName(databaseName); - tbl.setTableName(tableName); - if (isTableExternal()){ - tbl.setTableType(TableType.EXTERNAL_TABLE.toString()); - } else { - tbl.setTableType(TableType.MANAGED_TABLE.toString()); - } - StorageDescriptor sd = new StorageDescriptor(); - - sd.setCols(getTableColumns()); - tbl.setPartitionKeys(getPartitionKeys()); - - tbl.setSd(sd); - - sd.setBucketCols(new ArrayList(2)); - sd.setSerdeInfo(new SerDeInfo()); - sd.getSerdeInfo().setName(tbl.getTableName()); - sd.getSerdeInfo().setParameters(new HashMap()); - sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); - if (isTableExternal()){ - sd.getSerdeInfo().getParameters().put("EXTERNAL", "TRUE"); - } - sd.getSerdeInfo().setSerializationLib(serdeClass()); - sd.setInputFormat(inputFormat()); - sd.setOutputFormat(outputFormat()); - - Map tableParams = new HashMap(); - tbl.setParameters(tableParams); - - client.createTable(tbl); - } - - //Create test input file with specified number of rows - private void createInputFile(Path path, int rowCount) throws IOException { - - if (fs.exists(path)) { - fs.delete(path, true); - } - - FSDataOutputStream os = fs.create(path); - - for (int i = 0; i < rowCount; i++) { - os.writeChars(i + "\n"); - } - - os.close(); - } - - public static class MapCreate extends - Mapper { - - static int writeCount = 0; //test will be in local mode - - @Override - public void map(LongWritable key, Text value, Context context - ) throws IOException, InterruptedException { - { - try { - HCatRecord rec = writeRecords.get(writeCount); - context.write(null, rec); - writeCount++; - - } catch (Exception e) { - - e.printStackTrace(System.err); //print since otherwise exception is lost - throw new IOException(e); - } - } - } - } - - public static class MapRead extends - Mapper { - - static int readCount = 0; //test will be in local mode - - @Override - public void map(WritableComparable key, HCatRecord value, Context context - ) throws IOException, InterruptedException { - { - try { - readRecords.add(value); - readCount++; - } catch (Exception e) { - e.printStackTrace(); //print since otherwise exception is lost - throw new IOException(e); - } - } - } - } - - Job runMRCreate(Map partitionValues, - List partitionColumns, List records, - int writeCount, boolean assertWrite) throws Exception { - return runMRCreate(partitionValues, partitionColumns, records, writeCount, assertWrite, true); - } - - /** - * Run a local map reduce job to load data from in memory records to an HCatalog Table - * @param partitionValues - * @param partitionColumns - * @param records data to be written to HCatalog table - * @param writeCount - * @param assertWrite - * @param asSingleMapTask - * @return - * @throws Exception - */ - Job runMRCreate(Map partitionValues, - List partitionColumns, List records, - int writeCount, boolean assertWrite, boolean asSingleMapTask) throws Exception { - - writeRecords = records; - MapCreate.writeCount = 0; - - Configuration conf = new Configuration(); - Job job = new Job(conf, "hcat mapreduce write test"); - job.setJarByClass(this.getClass()); - job.setMapperClass(HCatMapReduceTest.MapCreate.class); - - // input/output settings - job.setInputFormatClass(TextInputFormat.class); - - if (asSingleMapTask) { - // One input path would mean only one map task - Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); - createInputFile(path, writeCount); - TextInputFormat.setInputPaths(job, path); - } else { - // Create two input paths so that two map tasks get triggered. There could be other ways - // to trigger two map tasks. - Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); - createInputFile(path, writeCount / 2); - - Path path2 = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput2"); - createInputFile(path2, (writeCount - writeCount / 2)); - - TextInputFormat.setInputPaths(job, path, path2); - } - - job.setOutputFormatClass(HCatOutputFormat.class); - - OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partitionValues); - HCatOutputFormat.setOutput(job, outputJobInfo); - - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(DefaultHCatRecord.class); - - job.setNumReduceTasks(0); - - HCatOutputFormat.setSchema(job, new HCatSchema(partitionColumns)); - - boolean success = job.waitForCompletion(true); - - // Ensure counters are set when data has actually been read. - if (partitionValues != null) { - assertTrue(job.getCounters().getGroup("FileSystemCounters") - .findCounter("FILE_BYTES_READ").getValue() > 0); - } - - if (!HCatUtil.isHadoop23()) { - // Local mode outputcommitter hook is not invoked in Hadoop 1.x - if (success) { - new FileOutputCommitterContainer(job, null).commitJob(job); - } else { - new FileOutputCommitterContainer(job, null).abortJob(job, JobStatus.State.FAILED); - } - } - if (assertWrite) { - // we assert only if we expected to assert with this call. - Assert.assertEquals(writeCount, MapCreate.writeCount); - } - - return job; - } - - List runMRRead(int readCount) throws Exception { - return runMRRead(readCount, null); - } - - /** - * Run a local map reduce job to read records from HCatalog table and verify if the count is as expected - * @param readCount - * @param filter - * @return - * @throws Exception - */ - List runMRRead(int readCount, String filter) throws Exception { - - MapRead.readCount = 0; - readRecords.clear(); - - Configuration conf = new Configuration(); - Job job = new Job(conf, "hcat mapreduce read test"); - job.setJarByClass(this.getClass()); - job.setMapperClass(HCatMapReduceTest.MapRead.class); - - // input/output settings - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - - HCatInputFormat.setInput(job, dbName, tableName).setFilter(filter); - - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(Text.class); - - job.setNumReduceTasks(0); - - Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceOutput"); - if (fs.exists(path)) { - fs.delete(path, true); - } - - TextOutputFormat.setOutputPath(job, path); - - job.waitForCompletion(true); - Assert.assertEquals(readCount, MapRead.readCount); - - return readRecords; - } - - - protected HCatSchema getTableSchema() throws Exception { - - Configuration conf = new Configuration(); - Job job = new Job(conf, "hcat mapreduce read schema test"); - job.setJarByClass(this.getClass()); - - // input/output settings - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - - HCatInputFormat.setInput(job, dbName, tableName); - - return HCatInputFormat.getTableSchema(job); - } - -} - - - diff --git hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatDynamicPartitioned.java hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatDynamicPartitioned.java deleted file mode 100644 index af25e2d..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatDynamicPartitioned.java +++ /dev/null @@ -1,208 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import junit.framework.Assert; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.common.ErrorType; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; -import org.junit.BeforeClass; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import static junit.framework.Assert.assertEquals; -import static junit.framework.Assert.assertTrue; - -public class TestHCatDynamicPartitioned extends HCatMapReduceTest { - - private static List writeRecords; - private static List dataColumns; - private static final Logger LOG = LoggerFactory.getLogger(TestHCatDynamicPartitioned.class); - protected static final int NUM_RECORDS = 20; - protected static final int NUM_PARTITIONS = 5; - - @BeforeClass - public static void generateInputData() throws Exception { - tableName = "testHCatDynamicPartitionedTable"; - generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); - generateDataColumns(); - } - - protected static void generateDataColumns() throws HCatException { - dataColumns = new ArrayList(); - dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); - dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); - dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("p1", serdeConstants.STRING_TYPE_NAME, ""))); - } - - protected static void generateWriteRecords(int max, int mod, int offset) { - writeRecords = new ArrayList(); - - for (int i = 0; i < max; i++) { - List objList = new ArrayList(); - - objList.add(i); - objList.add("strvalue" + i); - objList.add(String.valueOf((i % mod) + offset)); - writeRecords.add(new DefaultHCatRecord(objList)); - } - } - - @Override - protected List getPartitionKeys() { - List fields = new ArrayList(); - fields.add(new FieldSchema("p1", serdeConstants.STRING_TYPE_NAME, "")); - return fields; - } - - @Override - protected List getTableColumns() { - List fields = new ArrayList(); - fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); - fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); - return fields; - } - - /** - * Run the dynamic partitioning test but with single map task - * @throws Exception - */ - @Test - public void testHCatDynamicPartitionedTable() throws Exception { - runHCatDynamicPartitionedTable(true); - } - - /** - * Run the dynamic partitioning test but with multiple map task. See HCATALOG-490 - * @throws Exception - */ - @Test - public void testHCatDynamicPartitionedTableMultipleTask() throws Exception { - runHCatDynamicPartitionedTable(false); - } - - protected void runHCatDynamicPartitionedTable(boolean asSingleMapTask) throws Exception { - generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); - runMRCreate(null, dataColumns, writeRecords, NUM_RECORDS, true, asSingleMapTask); - - runMRRead(NUM_RECORDS); - - //Read with partition filter - runMRRead(4, "p1 = \"0\""); - runMRRead(8, "p1 = \"1\" or p1 = \"3\""); - runMRRead(4, "p1 = \"4\""); - - // read from hive to test - - String query = "select * from " + tableName; - int retCode = driver.run(query).getResponseCode(); - - if (retCode != 0) { - throw new Exception("Error " + retCode + " running query " + query); - } - - ArrayList res = new ArrayList(); - driver.getResults(res); - assertEquals(NUM_RECORDS, res.size()); - - - //Test for duplicate publish - IOException exc = null; - try { - generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); - Job job = runMRCreate(null, dataColumns, writeRecords, NUM_RECORDS, false); - - if (HCatUtil.isHadoop23()) { - Assert.assertTrue(job.isSuccessful()==false); - } - } catch (IOException e) { - exc = e; - } - - if (!HCatUtil.isHadoop23()) { - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertTrue("Got exception of type [" + ((HCatException) exc).getErrorType().toString() - + "] Expected ERROR_PUBLISHING_PARTITION or ERROR_MOVE_FAILED", - (ErrorType.ERROR_PUBLISHING_PARTITION == ((HCatException) exc).getErrorType()) - || (ErrorType.ERROR_MOVE_FAILED == ((HCatException) exc).getErrorType()) - ); - } - - query = "show partitions " + tableName; - retCode = driver.run(query).getResponseCode(); - if (retCode != 0) { - throw new Exception("Error " + retCode + " running query " + query); - } - res = new ArrayList(); - driver.getResults(res); - assertEquals(NUM_PARTITIONS, res.size()); - - query = "select * from " + tableName; - retCode = driver.run(query).getResponseCode(); - if (retCode != 0) { - throw new Exception("Error " + retCode + " running query " + query); - } - res = new ArrayList(); - driver.getResults(res); - assertEquals(NUM_RECORDS, res.size()); - } - - //TODO 1.0 miniCluster is slow this test times out, make it work -// renaming test to make test framework skip it - public void _testHCatDynamicPartitionMaxPartitions() throws Exception { - HiveConf hc = new HiveConf(this.getClass()); - - int maxParts = hiveConf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS); - LOG.info("Max partitions allowed = {}", maxParts); - - IOException exc = null; - try { - generateWriteRecords(maxParts + 5, maxParts + 2, 10); - runMRCreate(null, dataColumns, writeRecords, maxParts + 5, false); - } catch (IOException e) { - exc = e; - } - - if (HCatConstants.HCAT_IS_DYNAMIC_MAX_PTN_CHECK_ENABLED) { - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, ((HCatException) exc).getErrorType()); - } else { - assertTrue(exc == null); - runMRRead(maxParts + 5); - } - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatEximInputFormat.java.broken hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatEximInputFormat.java.broken deleted file mode 100644 index 8b3e089..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatEximInputFormat.java.broken +++ /dev/null @@ -1,429 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; - -import junit.framework.TestCase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.serde.Constants; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; -import org.apache.hcatalog.mapreduce.TestHCatEximInputFormat.TestImport.EmpDetails; - -/** - * - * TestHCatEximInputFormat. tests primarily HCatEximInputFormat but - * also HCatEximOutputFormat. - * - */ -public class TestHCatEximInputFormat extends TestCase { - - public static class TestExport extends - org.apache.hadoop.mapreduce.Mapper { - - private HCatSchema recordSchema; - - @Override - protected void setup(Context context) throws IOException, - InterruptedException { - super.setup(context); - recordSchema = HCatEximOutputFormat.getTableSchema(context); - } - - @Override - public void map(LongWritable key, Text value, Context context) - throws IOException, InterruptedException { - String[] cols = value.toString().split(","); - HCatRecord record = new DefaultHCatRecord(recordSchema.size()); - record.setInteger("emp_id", recordSchema, Integer.parseInt(cols[0])); - record.setString("emp_name", recordSchema, cols[1]); - record.setString("emp_dob", recordSchema, cols[2]); - record.setString("emp_sex", recordSchema, cols[3]); - context.write(key, record); - } - } - - public static class TestImport extends - org.apache.hadoop.mapreduce.Mapper< - org.apache.hadoop.io.LongWritable, HCatRecord, - org.apache.hadoop.io.Text, - org.apache.hadoop.io.Text> { - - private HCatSchema recordSchema; - - public static class EmpDetails { - public String emp_name; - public String emp_dob; - public String emp_sex; - public String emp_country; - public String emp_state; - } - - public static Map empRecords = new TreeMap(); - - @Override - protected void setup(Context context) throws IOException, - InterruptedException { - super.setup(context); - try { - recordSchema = HCatBaseInputFormat.getOutputSchema(context); - } catch (Exception e) { - throw new IOException("Error getting outputschema from job configuration", e); - } - System.out.println("RecordSchema : " + recordSchema.toString()); - } - - @Override - public void map(LongWritable key, HCatRecord value, Context context) - throws IOException, InterruptedException { - EmpDetails empDetails = new EmpDetails(); - Integer emp_id = value.getInteger("emp_id", recordSchema); - String emp_name = value.getString("emp_name", recordSchema); - empDetails.emp_name = emp_name; - if (recordSchema.getPosition("emp_dob") != null) { - empDetails.emp_dob = value.getString("emp_dob", recordSchema); - } - if (recordSchema.getPosition("emp_sex") != null) { - empDetails.emp_sex = value.getString("emp_sex", recordSchema); - } - if (recordSchema.getPosition("emp_country") != null) { - empDetails.emp_country = value.getString("emp_country", recordSchema); - } - if (recordSchema.getPosition("emp_state") != null) { - empDetails.emp_state = value.getString("emp_state", recordSchema); - } - empRecords.put(emp_id, empDetails); - } - } - - private static final String dbName = "hcatEximOutputFormatTestDB"; - private static final String tblName = "hcatEximOutputFormatTestTable"; - Configuration conf; - Job job; - List columns; - HCatSchema schema; - FileSystem fs; - Path inputLocation; - Path outputLocation; - private HCatSchema partSchema; - - - @Override - protected void setUp() throws Exception { - System.out.println("Setup started"); - super.setUp(); - conf = new Configuration(); - job = new Job(conf, "test eximinputformat"); - columns = new ArrayList(); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_id", - Constants.INT_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_name", - Constants.STRING_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_dob", - Constants.STRING_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_sex", - Constants.STRING_TYPE_NAME, ""))); - schema = new HCatSchema(columns); - - fs = new LocalFileSystem(); - fs.initialize(fs.getWorkingDirectory().toUri(), new Configuration()); - inputLocation = new Path(fs.getWorkingDirectory(), "tmp/exports"); - outputLocation = new Path(fs.getWorkingDirectory(), "tmp/data"); - - job.setJarByClass(this.getClass()); - job.setNumReduceTasks(0); - System.out.println("Setup done"); - } - - private void setupMRExport(String[] records) throws IOException { - if (fs.exists(outputLocation)) { - fs.delete(outputLocation, true); - } - FSDataOutputStream ds = fs.create(outputLocation, true); - for (String record : records) { - ds.writeBytes(record); - } - ds.close(); - job.setInputFormatClass(TextInputFormat.class); - job.setOutputFormatClass(HCatEximOutputFormat.class); - TextInputFormat.setInputPaths(job, outputLocation); - job.setMapperClass(TestExport.class); - } - - private void setupMRImport() throws IOException { - if (fs.exists(outputLocation)) { - fs.delete(outputLocation, true); - } - job.setInputFormatClass(HCatEximInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - TextOutputFormat.setOutputPath(job, outputLocation); - job.setMapperClass(TestImport.class); - TestImport.empRecords.clear(); - } - - - @Override - protected void tearDown() throws Exception { - System.out.println("Teardown started"); - super.tearDown(); - // fs.delete(inputLocation, true); - // fs.delete(outputLocation, true); - System.out.println("Teardown done"); - } - - - private void runNonPartExport() throws IOException, InterruptedException, ClassNotFoundException { - if (fs.exists(inputLocation)) { - fs.delete(inputLocation, true); - } - setupMRExport(new String[] { - "237,Krishna,01/01/1990,M,IN,TN\n", - "238,Kalpana,01/01/2000,F,IN,KA\n", - "239,Satya,01/01/2001,M,US,TN\n", - "240,Kavya,01/01/2002,F,US,KA\n" - - }); - HCatEximOutputFormat.setOutput( - job, - dbName, - tblName, - inputLocation.toString(), - null, - null, - schema); - - job.waitForCompletion(true); - HCatEximOutputCommitter committer = new HCatEximOutputCommitter(job,null); - committer.cleanupJob(job); - } - - private void runPartExport(String record, String country, String state) throws IOException, InterruptedException, ClassNotFoundException { - setupMRExport(new String[] {record}); - List partValues = new ArrayList(2); - partValues.add(country); - partValues.add(state); - HCatEximOutputFormat.setOutput( - job, - dbName, - tblName, - inputLocation.toString(), - partSchema , - partValues , - schema); - - job.waitForCompletion(true); - HCatEximOutputCommitter committer = new HCatEximOutputCommitter(job,null); - committer.cleanupJob(job); - } - - public void testNonPart() throws Exception { - try { - runNonPartExport(); - setUp(); - setupMRImport(); - HCatEximInputFormat.setInput(job, "tmp/exports", null); - job.waitForCompletion(true); - - assertEquals(4, TestImport.empRecords.size()); - assertEmpDetail(TestImport.empRecords.get(237), "Krishna", "01/01/1990", "M", null, null); - assertEmpDetail(TestImport.empRecords.get(238), "Kalpana", "01/01/2000", "F", null, null); - assertEmpDetail(TestImport.empRecords.get(239), "Satya", "01/01/2001", "M", null, null); - assertEmpDetail(TestImport.empRecords.get(240), "Kavya", "01/01/2002", "F", null, null); - } catch (Exception e) { - System.out.println("Test failed with " + e.getMessage()); - e.printStackTrace(); - throw e; - } - } - - public void testNonPartProjection() throws Exception { - try { - - runNonPartExport(); - setUp(); - setupMRImport(); - HCatEximInputFormat.setInput(job, "tmp/exports", null); - - List readColumns = new ArrayList(); - readColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_id", - Constants.INT_TYPE_NAME, ""))); - readColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_name", - Constants.STRING_TYPE_NAME, ""))); - - HCatEximInputFormat.setOutputSchema(job, new HCatSchema(readColumns)); - job.waitForCompletion(true); - - assertEquals(4, TestImport.empRecords.size()); - assertEmpDetail(TestImport.empRecords.get(237), "Krishna", null, null, null, null); - assertEmpDetail(TestImport.empRecords.get(238), "Kalpana", null, null, null, null); - assertEmpDetail(TestImport.empRecords.get(239), "Satya", null, null, null, null); - assertEmpDetail(TestImport.empRecords.get(240), "Kavya", null, null, null, null); - } catch (Exception e) { - System.out.println("Test failed with " + e.getMessage()); - e.printStackTrace(); - throw e; - } - } - - public void testPart() throws Exception { - try { - if (fs.exists(inputLocation)) { - fs.delete(inputLocation, true); - } - - List partKeys = new ArrayList(2); - partKeys.add(new HCatFieldSchema("emp_country", HCatFieldSchema.Type.STRING, "")); - partKeys.add(new HCatFieldSchema("emp_state", HCatFieldSchema.Type.STRING, "")); - partSchema = new HCatSchema(partKeys); - - runPartExport("237,Krishna,01/01/1990,M,IN,TN", "in", "tn"); - setUp(); - runPartExport("238,Kalpana,01/01/2000,F,IN,KA\n", "in", "ka"); - setUp(); - runPartExport("239,Satya,01/01/2001,M,US,TN\n", "us", "tn"); - setUp(); - runPartExport("240,Kavya,01/01/2002,F,US,KA\n", "us", "ka"); - - setUp(); - setupMRImport(); - HCatEximInputFormat.setInput(job, "tmp/exports", null); - job.waitForCompletion(true); - - assertEquals(4, TestImport.empRecords.size()); - assertEmpDetail(TestImport.empRecords.get(237), "Krishna", "01/01/1990", "M", "in", "tn"); - assertEmpDetail(TestImport.empRecords.get(238), "Kalpana", "01/01/2000", "F", "in", "ka"); - assertEmpDetail(TestImport.empRecords.get(239), "Satya", "01/01/2001", "M", "us", "tn"); - assertEmpDetail(TestImport.empRecords.get(240), "Kavya", "01/01/2002", "F", "us", "ka"); - } catch (Exception e) { - System.out.println("Test failed with " + e.getMessage()); - e.printStackTrace(); - throw e; - } - } - - public void testPartWithPartCols() throws Exception { - try { - if (fs.exists(inputLocation)) { - fs.delete(inputLocation, true); - } - - List partKeys = new ArrayList(2); - partKeys.add(new HCatFieldSchema("emp_country", HCatFieldSchema.Type.STRING, "")); - partKeys.add(new HCatFieldSchema("emp_state", HCatFieldSchema.Type.STRING, "")); - partSchema = new HCatSchema(partKeys); - - runPartExport("237,Krishna,01/01/1990,M,IN,TN", "in", "tn"); - setUp(); - runPartExport("238,Kalpana,01/01/2000,F,IN,KA\n", "in", "ka"); - setUp(); - runPartExport("239,Satya,01/01/2001,M,US,TN\n", "us", "tn"); - setUp(); - runPartExport("240,Kavya,01/01/2002,F,US,KA\n", "us", "ka"); - - setUp(); - setupMRImport(); - HCatEximInputFormat.setInput(job, "tmp/exports", null); - - List colsPlusPartKeys = new ArrayList(); - colsPlusPartKeys.addAll(columns); - colsPlusPartKeys.addAll(partKeys); - - HCatBaseInputFormat.setOutputSchema(job, new HCatSchema(colsPlusPartKeys)); - job.waitForCompletion(true); - - assertEquals(4, TestImport.empRecords.size()); - assertEmpDetail(TestImport.empRecords.get(237), "Krishna", "01/01/1990", "M", "in", "tn"); - assertEmpDetail(TestImport.empRecords.get(238), "Kalpana", "01/01/2000", "F", "in", "ka"); - assertEmpDetail(TestImport.empRecords.get(239), "Satya", "01/01/2001", "M", "us", "tn"); - assertEmpDetail(TestImport.empRecords.get(240), "Kavya", "01/01/2002", "F", "us", "ka"); - } catch (Exception e) { - System.out.println("Test failed with " + e.getMessage()); - e.printStackTrace(); - throw e; - } - } - - - public void testPartSelection() throws Exception { - try { - if (fs.exists(inputLocation)) { - fs.delete(inputLocation, true); - } - - List partKeys = new ArrayList(2); - partKeys.add(new HCatFieldSchema("emp_country", HCatFieldSchema.Type.STRING, "")); - partKeys.add(new HCatFieldSchema("emp_state", HCatFieldSchema.Type.STRING, "")); - partSchema = new HCatSchema(partKeys); - - runPartExport("237,Krishna,01/01/1990,M,IN,TN", "in", "tn"); - setUp(); - runPartExport("238,Kalpana,01/01/2000,F,IN,KA\n", "in", "ka"); - setUp(); - runPartExport("239,Satya,01/01/2001,M,US,TN\n", "us", "tn"); - setUp(); - runPartExport("240,Kavya,01/01/2002,F,US,KA\n", "us", "ka"); - - setUp(); - setupMRImport(); - Map filter = new TreeMap(); - filter.put("emp_state", "ka"); - HCatEximInputFormat.setInput(job, "tmp/exports", filter); - job.waitForCompletion(true); - - assertEquals(2, TestImport.empRecords.size()); - assertEmpDetail(TestImport.empRecords.get(238), "Kalpana", "01/01/2000", "F", "in", "ka"); - assertEmpDetail(TestImport.empRecords.get(240), "Kavya", "01/01/2002", "F", "us", "ka"); - } catch (Exception e) { - System.out.println("Test failed with " + e.getMessage()); - e.printStackTrace(); - throw e; - } - } - - - private void assertEmpDetail(EmpDetails empDetails, String name, String dob, String mf, String country, String state) { - assertNotNull(empDetails); - assertEquals(name, empDetails.emp_name); - assertEquals(dob, empDetails.emp_dob); - assertEquals(mf, empDetails.emp_sex); - assertEquals(country, empDetails.emp_country); - assertEquals(state, empDetails.emp_state); - } - -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatEximOutputFormat.java.broken hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatEximOutputFormat.java.broken deleted file mode 100644 index bf4fb48..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatEximOutputFormat.java.broken +++ /dev/null @@ -1,261 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import junit.framework.TestCase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.parse.EximUtil; -import org.apache.hadoop.hive.serde.Constants; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; - -/** - * - * TestHCatEximOutputFormat. Some basic testing here. More testing done via - * TestHCatEximInputFormat - * - */ -public class TestHCatEximOutputFormat extends TestCase { - - public static class TestMap extends - Mapper { - - private HCatSchema recordSchema; - - @Override - protected void setup(Context context) throws IOException, - InterruptedException { - super.setup(context); - recordSchema = HCatEximOutputFormat.getTableSchema(context); - System.out.println("TestMap/setup called"); - } - - @Override - public void map(LongWritable key, Text value, Context context) - throws IOException, InterruptedException { - String[] cols = value.toString().split(","); - HCatRecord record = new DefaultHCatRecord(recordSchema.size()); - System.out.println("TestMap/map called. Cols[0]:" + cols[0]); - System.out.println("TestMap/map called. Cols[1]:" + cols[1]); - System.out.println("TestMap/map called. Cols[2]:" + cols[2]); - System.out.println("TestMap/map called. Cols[3]:" + cols[3]); - record.setInteger("emp_id", recordSchema, Integer.parseInt(cols[0])); - record.setString("emp_name", recordSchema, cols[1]); - record.setString("emp_dob", recordSchema, cols[2]); - record.setString("emp_sex", recordSchema, cols[3]); - context.write(key, record); - } - } - - - private static final String dbName = "hcatEximOutputFormatTestDB"; - private static final String tblName = "hcatEximOutputFormatTestTable"; - Configuration conf; - Job job; - List columns; - HCatSchema schema; - FileSystem fs; - Path outputLocation; - Path dataLocation; - - public void testNonPart() throws Exception { - try { - HCatEximOutputFormat.setOutput( - job, - dbName, - tblName, - outputLocation.toString(), - null, - null, - schema); - - job.waitForCompletion(true); - HCatEximOutputCommitter committer = new HCatEximOutputCommitter(job,null); - committer.cleanupJob(job); - - Path metadataPath = new Path(outputLocation, "_metadata"); - Map.Entry> rv = EximUtil.readMetaData(fs, metadataPath); - Table table = rv.getKey(); - List partitions = rv.getValue(); - - assertEquals(dbName, table.getDbName()); - assertEquals(tblName, table.getTableName()); - assertTrue(EximUtil.schemaCompare(table.getSd().getCols(), - HCatUtil.getFieldSchemaList(columns))); - assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", - table.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); - assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", - table.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); - assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat", - table.getSd().getInputFormat()); - assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat", - table.getSd().getOutputFormat()); - assertEquals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe", - table.getSd().getSerdeInfo().getSerializationLib()); - assertEquals(0, table.getPartitionKeys().size()); - - assertEquals(0, partitions.size()); - } catch (Exception e) { - System.out.println("Test failed with " + e.getMessage()); - e.printStackTrace(); - throw e; - } - - } - - public void testPart() throws Exception { - try { - List partKeys = new ArrayList(); - partKeys.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_country", - Constants.STRING_TYPE_NAME, ""))); - partKeys.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_state", - Constants.STRING_TYPE_NAME, ""))); - HCatSchema partitionSchema = new HCatSchema(partKeys); - - List partitionVals = new ArrayList(); - partitionVals.add("IN"); - partitionVals.add("TN"); - - HCatEximOutputFormat.setOutput( - job, - dbName, - tblName, - outputLocation.toString(), - partitionSchema, - partitionVals, - schema); - - job.waitForCompletion(true); - HCatEximOutputCommitter committer = new HCatEximOutputCommitter(job,null); - committer.cleanupJob(job); - Path metadataPath = new Path(outputLocation, "_metadata"); - Map.Entry> rv = EximUtil.readMetaData(fs, metadataPath); - Table table = rv.getKey(); - List partitions = rv.getValue(); - - assertEquals(dbName, table.getDbName()); - assertEquals(tblName, table.getTableName()); - assertTrue(EximUtil.schemaCompare(table.getSd().getCols(), - HCatUtil.getFieldSchemaList(columns))); - assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", - table.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); - assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", - table.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); - assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat", - table.getSd().getInputFormat()); - assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat", - table.getSd().getOutputFormat()); - assertEquals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe", - table.getSd().getSerdeInfo().getSerializationLib()); - assertEquals(2, table.getPartitionKeys().size()); - List partSchema = table.getPartitionKeys(); - assertEquals("emp_country", partSchema.get(0).getName()); - assertEquals("emp_state", partSchema.get(1).getName()); - - assertEquals(1, partitions.size()); - Partition partition = partitions.get(0); - assertEquals("IN", partition.getValues().get(0)); - assertEquals("TN", partition.getValues().get(1)); - assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", - partition.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); - assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", - partition.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); - } catch (Exception e) { - System.out.println("Test failed with " + e.getMessage()); - e.printStackTrace(); - throw e; - } - } - - @Override - protected void setUp() throws Exception { - System.out.println("Setup started"); - super.setUp(); - conf = new Configuration(); - job = new Job(conf, "test eximoutputformat"); - columns = new ArrayList(); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_id", - Constants.INT_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_name", - Constants.STRING_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_dob", - Constants.STRING_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_sex", - Constants.STRING_TYPE_NAME, ""))); - schema = new HCatSchema(columns); - - fs = new LocalFileSystem(); - fs.initialize(fs.getWorkingDirectory().toUri(), new Configuration()); - outputLocation = new Path(fs.getWorkingDirectory(), "tmp/exports"); - if (fs.exists(outputLocation)) { - fs.delete(outputLocation, true); - } - dataLocation = new Path(fs.getWorkingDirectory(), "tmp/data"); - if (fs.exists(dataLocation)) { - fs.delete(dataLocation, true); - } - FSDataOutputStream ds = fs.create(dataLocation, true); - ds.writeBytes("237,Krishna,01/01/1990,M,IN,TN\n"); - ds.writeBytes("238,Kalpana,01/01/2000,F,IN,KA\n"); - ds.writeBytes("239,Satya,01/01/2001,M,US,TN\n"); - ds.writeBytes("240,Kavya,01/01/2002,F,US,KA\n"); - ds.close(); - - job.setInputFormatClass(TextInputFormat.class); - job.setOutputFormatClass(HCatEximOutputFormat.class); - TextInputFormat.setInputPaths(job, dataLocation); - job.setJarByClass(this.getClass()); - job.setMapperClass(TestMap.class); - job.setNumReduceTasks(0); - System.out.println("Setup done"); - } - - @Override - protected void tearDown() throws Exception { - System.out.println("Teardown started"); - super.tearDown(); - fs.delete(dataLocation, true); - fs.delete(outputLocation, true); - System.out.println("Teardown done"); - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatExternalDynamicPartitioned.java hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatExternalDynamicPartitioned.java deleted file mode 100644 index ff61675..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatExternalDynamicPartitioned.java +++ /dev/null @@ -1,29 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -public class TestHCatExternalDynamicPartitioned extends TestHCatDynamicPartitioned { - - @Override - protected Boolean isTableExternal() { - return true; - } - -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatExternalHCatNonPartitioned.java hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatExternalHCatNonPartitioned.java deleted file mode 100644 index 5fe4d3e..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatExternalHCatNonPartitioned.java +++ /dev/null @@ -1,29 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -public class TestHCatExternalHCatNonPartitioned extends TestHCatNonPartitioned { - - @Override - protected Boolean isTableExternal() { - return true; - } - -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatExternalPartitioned.java hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatExternalPartitioned.java deleted file mode 100644 index 2fdbeab..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatExternalPartitioned.java +++ /dev/null @@ -1,29 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -public class TestHCatExternalPartitioned extends TestHCatPartitioned { - - @Override - protected Boolean isTableExternal() { - return true; - } - -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatHiveCompatibility.java hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatHiveCompatibility.java deleted file mode 100644 index a290d3e..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatHiveCompatibility.java +++ /dev/null @@ -1,129 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.File; -import java.io.FileWriter; -import java.util.Arrays; -import java.util.Iterator; - -import junit.framework.Assert; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.data.Tuple; -import org.junit.BeforeClass; -import org.junit.Test; - -public class TestHCatHiveCompatibility extends HCatBaseTest { - private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; - - @BeforeClass - public static void createInputData() throws Exception { - int LOOP_SIZE = 11; - File file = new File(INPUT_FILE_NAME); - file.deleteOnExit(); - FileWriter writer = new FileWriter(file); - for (int i = 0; i < LOOP_SIZE; i++) { - writer.write(i + "\t1\n"); - } - writer.close(); - } - - @Test - public void testUnpartedReadWrite() throws Exception { - - driver.run("drop table if exists junit_unparted_noisd"); - String createTable = "create table junit_unparted_noisd(a int) stored as RCFILE"; - Assert.assertEquals(0, driver.run(createTable).getResponseCode()); - - // assert that the table created has no hcat instrumentation, and that we're still able to read it. - Table table = client.getTable("default", "junit_unparted_noisd"); - Assert.assertTrue(table.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); - - PigServer server = new PigServer(ExecType.LOCAL); - logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (a:int);"); - logAndRegister(server, "store A into 'default.junit_unparted_noisd' using org.apache.hcatalog.pig.HCatStorer();"); - logAndRegister(server, "B = load 'default.junit_unparted_noisd' using org.apache.hcatalog.pig.HCatLoader();"); - Iterator itr = server.openIterator("B"); - - int i = 0; - - while (itr.hasNext()) { - Tuple t = itr.next(); - Assert.assertEquals(1, t.size()); - Assert.assertEquals(t.get(0), i); - i++; - } - - Assert.assertFalse(itr.hasNext()); - Assert.assertEquals(11, i); - - // assert that the table created still has no hcat instrumentation - Table table2 = client.getTable("default", "junit_unparted_noisd"); - Assert.assertTrue(table2.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); - - driver.run("drop table junit_unparted_noisd"); - } - - @Test - public void testPartedRead() throws Exception { - - driver.run("drop table if exists junit_parted_noisd"); - String createTable = "create table junit_parted_noisd(a int) partitioned by (b string) stored as RCFILE"; - Assert.assertEquals(0, driver.run(createTable).getResponseCode()); - - // assert that the table created has no hcat instrumentation, and that we're still able to read it. - Table table = client.getTable("default", "junit_parted_noisd"); - Assert.assertTrue(table.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); - - PigServer server = new PigServer(ExecType.LOCAL); - logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (a:int);"); - logAndRegister(server, "store A into 'default.junit_parted_noisd' using org.apache.hcatalog.pig.HCatStorer('b=42');"); - logAndRegister(server, "B = load 'default.junit_parted_noisd' using org.apache.hcatalog.pig.HCatLoader();"); - Iterator itr = server.openIterator("B"); - - int i = 0; - - while (itr.hasNext()) { - Tuple t = itr.next(); - Assert.assertEquals(2, t.size()); // Contains explicit field "a" and partition "b". - Assert.assertEquals(t.get(0), i); - Assert.assertEquals(t.get(1), "42"); - i++; - } - - Assert.assertFalse(itr.hasNext()); - Assert.assertEquals(11, i); - - // assert that the table created still has no hcat instrumentation - Table table2 = client.getTable("default", "junit_parted_noisd"); - Assert.assertTrue(table2.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); - - // assert that there is one partition present, and it had hcat instrumentation inserted when it was created. - Partition ptn = client.getPartition("default", "junit_parted_noisd", Arrays.asList("42")); - - Assert.assertNotNull(ptn); - Assert.assertTrue(ptn.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); - driver.run("drop table junit_unparted_noisd"); - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatHiveThriftCompatibility.java hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatHiveThriftCompatibility.java deleted file mode 100644 index 52a4f11..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatHiveThriftCompatibility.java +++ /dev/null @@ -1,116 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import junit.framework.Assert; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.serde2.thrift.test.IntString; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.SequenceFile; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.data.DataType; -import org.apache.pig.data.Tuple; -import org.apache.pig.impl.logicalLayer.schema.Schema; -import org.apache.thrift.protocol.TBinaryProtocol; -import org.apache.thrift.transport.TIOStreamTransport; -import org.junit.Before; -import org.junit.Test; - -import java.io.ByteArrayOutputStream; -import java.util.Iterator; - -public class TestHCatHiveThriftCompatibility extends HCatBaseTest { - - private boolean setUpComplete = false; - private Path intStringSeq; - - @Before - @Override - public void setUp() throws Exception { - super.setUp(); - if (setUpComplete) { - return; - } - - ByteArrayOutputStream out = new ByteArrayOutputStream(); - TIOStreamTransport transport = new TIOStreamTransport(out); - TBinaryProtocol protocol = new TBinaryProtocol(transport); - - IntString intString = new IntString(1, "one", 1); - intString.write(protocol); - BytesWritable bytesWritable = new BytesWritable(out.toByteArray()); - - intStringSeq = new Path(TEST_DATA_DIR + "/data/intString.seq"); - LOG.info("Creating data file: " + intStringSeq); - - SequenceFile.Writer seqFileWriter = SequenceFile.createWriter( - intStringSeq.getFileSystem(hiveConf), hiveConf, intStringSeq, - NullWritable.class, BytesWritable.class); - seqFileWriter.append(NullWritable.get(), bytesWritable); - seqFileWriter.close(); - - setUpComplete = true; - } - - /** - * Create a table with no explicit schema and ensure its correctly - * discovered from the thrift struct. - */ - @Test - public void testDynamicCols() throws Exception { - Assert.assertEquals(0, driver.run("drop table if exists test_thrift").getResponseCode()); - Assert.assertEquals(0, driver.run( - "create external table test_thrift " + - "partitioned by (year string) " + - "row format serde 'org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer' " + - "with serdeproperties ( " + - " 'serialization.class'='org.apache.hadoop.hive.serde2.thrift.test.IntString', " + - " 'serialization.format'='org.apache.thrift.protocol.TBinaryProtocol') " + - "stored as" + - " inputformat 'org.apache.hadoop.mapred.SequenceFileInputFormat'" + - " outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'") - .getResponseCode()); - Assert.assertEquals(0, - driver.run("alter table test_thrift add partition (year = '2012') location '" + - intStringSeq.getParent() + "'").getResponseCode()); - - PigServer pigServer = new PigServer(ExecType.LOCAL); - pigServer.registerQuery("A = load 'test_thrift' using org.apache.hcatalog.pig.HCatLoader();"); - - Schema expectedSchema = new Schema(); - expectedSchema.add(new Schema.FieldSchema("myint", DataType.INTEGER)); - expectedSchema.add(new Schema.FieldSchema("mystring", DataType.CHARARRAY)); - expectedSchema.add(new Schema.FieldSchema("underscore_int", DataType.INTEGER)); - expectedSchema.add(new Schema.FieldSchema("year", DataType.CHARARRAY)); - - Assert.assertEquals(expectedSchema, pigServer.dumpSchema("A")); - - Iterator iterator = pigServer.openIterator("A"); - Tuple t = iterator.next(); - Assert.assertEquals(1, t.get(0)); - Assert.assertEquals("one", t.get(1)); - Assert.assertEquals(1, t.get(2)); - Assert.assertEquals("2012", t.get(3)); - - Assert.assertFalse(iterator.hasNext()); - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatInputFormat.java hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatInputFormat.java deleted file mode 100644 index 4c5d621..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatInputFormat.java +++ /dev/null @@ -1,148 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import junit.framework.Assert; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.serde2.thrift.test.IntString; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.SequenceFile; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.thrift.protocol.TBinaryProtocol; -import org.apache.thrift.transport.TIOStreamTransport; -import org.junit.Before; -import org.junit.Test; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; - -public class TestHCatInputFormat extends HCatBaseTest { - - private boolean setUpComplete = false; - - /** - * Create an input sequence file with 100 records; every 10th record is bad. - * Load this table into Hive. - */ - @Before - @Override - public void setUp() throws Exception { - super.setUp(); - if (setUpComplete) { - return; - } - - Path intStringSeq = new Path(TEST_DATA_DIR + "/data/intString.seq"); - LOG.info("Creating data file: " + intStringSeq); - SequenceFile.Writer seqFileWriter = SequenceFile.createWriter( - intStringSeq.getFileSystem(hiveConf), hiveConf, intStringSeq, - NullWritable.class, BytesWritable.class); - - ByteArrayOutputStream out = new ByteArrayOutputStream(); - TIOStreamTransport transport = new TIOStreamTransport(out); - TBinaryProtocol protocol = new TBinaryProtocol(transport); - - for (int i = 1; i <= 100; i++) { - if (i % 10 == 0) { - seqFileWriter.append(NullWritable.get(), new BytesWritable("bad record".getBytes())); - } else { - out.reset(); - IntString intString = new IntString(i, Integer.toString(i), i); - intString.write(protocol); - BytesWritable bytesWritable = new BytesWritable(out.toByteArray()); - seqFileWriter.append(NullWritable.get(), bytesWritable); - } - } - - seqFileWriter.close(); - - // Now let's load this file into a new Hive table. - Assert.assertEquals(0, driver.run("drop table if exists test_bad_records").getResponseCode()); - Assert.assertEquals(0, driver.run( - "create table test_bad_records " + - "row format serde 'org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer' " + - "with serdeproperties ( " + - " 'serialization.class'='org.apache.hadoop.hive.serde2.thrift.test.IntString', " + - " 'serialization.format'='org.apache.thrift.protocol.TBinaryProtocol') " + - "stored as" + - " inputformat 'org.apache.hadoop.mapred.SequenceFileInputFormat'" + - " outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'") - .getResponseCode()); - Assert.assertEquals(0, driver.run("load data local inpath '" + intStringSeq.getParent() + - "' into table test_bad_records").getResponseCode()); - - setUpComplete = true; - } - - @Test - public void testBadRecordHandlingPasses() throws Exception { - Assert.assertTrue(runJob(0.1f)); - } - - @Test - public void testBadRecordHandlingFails() throws Exception { - Assert.assertFalse(runJob(0.01f)); - } - - private boolean runJob(float badRecordThreshold) throws Exception { - Configuration conf = new Configuration(); - - conf.setFloat(HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY, badRecordThreshold); - - Job job = new Job(conf); - job.setJarByClass(this.getClass()); - job.setMapperClass(MyMapper.class); - - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - - HCatInputFormat.setInput(job, "default", "test_bad_records"); - - job.setMapOutputKeyClass(HCatRecord.class); - job.setMapOutputValueClass(HCatRecord.class); - - job.setNumReduceTasks(0); - - Path path = new Path(TEST_DATA_DIR, "test_bad_record_handling_output"); - if (path.getFileSystem(conf).exists(path)) { - path.getFileSystem(conf).delete(path, true); - } - - TextOutputFormat.setOutputPath(job, path); - - return job.waitForCompletion(true); - } - - public static class MyMapper extends Mapper { - @Override - public void map(NullWritable key, HCatRecord value, Context context) - throws IOException, InterruptedException { - LOG.info("HCatRecord: " + value); - context.write(NullWritable.get(), new Text(value.toString())); - } - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatMultiOutputFormat.java hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatMultiOutputFormat.java deleted file mode 100644 index b5f22af..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatMultiOutputFormat.java +++ /dev/null @@ -1,430 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.File; -import java.io.IOException; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Random; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStore; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.metastore.api.SerDeInfo; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.exec.FetchTask; -import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.plan.FetchWork; -import org.apache.hadoop.hive.ql.plan.PartitionDesc; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.MiniMRCluster; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; -import org.apache.hcatalog.mapreduce.MultiOutputFormat.JobConfigurer; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TestHCatMultiOutputFormat { - - private static final Logger LOG = LoggerFactory.getLogger(TestHCatMultiOutputFormat.class); - - private static final String DATABASE = "default"; - private static final String[] tableNames = {"test1", "test2", "test3"}; - private static final String[] tablePerms = {"755", "750", "700"}; - private static Path warehousedir = null; - private static HashMap schemaMap = new HashMap(); - private static HiveMetaStoreClient hmsc; - private static MiniMRCluster mrCluster; - private static Configuration mrConf; - private static HiveConf hiveConf; - private static File workDir; - - private static final String msPort = "20199"; - private static Thread t; - - static { - schemaMap.put(tableNames[0], new HCatSchema(ColumnHolder.hCattest1Cols)); - schemaMap.put(tableNames[1], new HCatSchema(ColumnHolder.hCattest2Cols)); - schemaMap.put(tableNames[2], new HCatSchema(ColumnHolder.hCattest3Cols)); - } - - private static class RunMS implements Runnable { - - @Override - public void run() { - try { - String warehouseConf = HiveConf.ConfVars.METASTOREWAREHOUSE.varname + "=" - + warehousedir.toString(); - HiveMetaStore.main(new String[]{"-v", "-p", msPort, "--hiveconf", warehouseConf}); - } catch (Throwable t) { - System.err.println("Exiting. Got exception from metastore: " + t.getMessage()); - } - } - - } - - /** - * Private class which holds all the data for the test cases - */ - private static class ColumnHolder { - - private static ArrayList hCattest1Cols = new ArrayList(); - private static ArrayList hCattest2Cols = new ArrayList(); - private static ArrayList hCattest3Cols = new ArrayList(); - - private static ArrayList partitionCols = new ArrayList(); - private static ArrayList test1Cols = new ArrayList(); - private static ArrayList test2Cols = new ArrayList(); - private static ArrayList test3Cols = new ArrayList(); - - private static HashMap> colMapping = new HashMap>(); - - static { - try { - FieldSchema keyCol = new FieldSchema("key", serdeConstants.STRING_TYPE_NAME, ""); - test1Cols.add(keyCol); - test2Cols.add(keyCol); - test3Cols.add(keyCol); - hCattest1Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); - hCattest2Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); - hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); - FieldSchema valueCol = new FieldSchema("value", serdeConstants.STRING_TYPE_NAME, ""); - test1Cols.add(valueCol); - test3Cols.add(valueCol); - hCattest1Cols.add(HCatSchemaUtils.getHCatFieldSchema(valueCol)); - hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(valueCol)); - FieldSchema extraCol = new FieldSchema("extra", serdeConstants.STRING_TYPE_NAME, ""); - test3Cols.add(extraCol); - hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(extraCol)); - colMapping.put("test1", test1Cols); - colMapping.put("test2", test2Cols); - colMapping.put("test3", test3Cols); - } catch (HCatException e) { - LOG.error("Error in setting up schema fields for the table", e); - throw new RuntimeException(e); - } - } - - static { - partitionCols.add(new FieldSchema("ds", serdeConstants.STRING_TYPE_NAME, "")); - partitionCols.add(new FieldSchema("cluster", serdeConstants.STRING_TYPE_NAME, "")); - } - } - - @BeforeClass - public static void setup() throws Exception { - String testDir = System.getProperty("test.data.dir", "./"); - testDir = testDir + "/test_multitable_" + Math.abs(new Random().nextLong()) + "/"; - workDir = new File(new File(testDir).getCanonicalPath()); - FileUtil.fullyDelete(workDir); - workDir.mkdirs(); - - warehousedir = new Path(workDir + "/warehouse"); - - // Run hive metastore server - t = new Thread(new RunMS()); - t.start(); - - // LocalJobRunner does not work with mapreduce OutputCommitter. So need - // to use MiniMRCluster. MAPREDUCE-2350 - Configuration conf = new Configuration(true); - conf.set("yarn.scheduler.capacity.root.queues", "default"); - conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); - - FileSystem fs = FileSystem.get(conf); - System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); - mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, - new JobConf(conf)); - mrConf = mrCluster.createJobConf(); - fs.mkdirs(warehousedir); - - initializeSetup(); - } - - private static void initializeSetup() throws Exception { - - hiveConf = new HiveConf(mrConf, TestHCatMultiOutputFormat.class); - hiveConf.set("hive.metastore.local", "false"); - hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + msPort); - hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); - hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); - hiveConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); - System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); - - hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, warehousedir.toString()); - try { - hmsc = new HiveMetaStoreClient(hiveConf, null); - initalizeTables(); - } catch (Throwable e) { - LOG.error("Exception encountered while setting up testcase", e); - throw new Exception(e); - } finally { - hmsc.close(); - } - } - - private static void initalizeTables() throws Exception { - for (String table : tableNames) { - try { - if (hmsc.getTable(DATABASE, table) != null) { - hmsc.dropTable(DATABASE, table); - } - } catch (NoSuchObjectException ignored) { - } - } - for (int i = 0; i < tableNames.length; i++) { - createTable(tableNames[i], tablePerms[i]); - } - } - - private static void createTable(String tableName, String tablePerm) throws Exception { - Table tbl = new Table(); - tbl.setDbName(DATABASE); - tbl.setTableName(tableName); - StorageDescriptor sd = new StorageDescriptor(); - sd.setCols(ColumnHolder.colMapping.get(tableName)); - tbl.setSd(sd); - sd.setParameters(new HashMap()); - sd.setSerdeInfo(new SerDeInfo()); - sd.getSerdeInfo().setName(tbl.getTableName()); - sd.getSerdeInfo().setParameters(new HashMap()); - sd.setInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat.class.getName()); - sd.setOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat.class.getName()); - sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); - sd.getSerdeInfo().setSerializationLib( - org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe.class.getName()); - tbl.setPartitionKeys(ColumnHolder.partitionCols); - - hmsc.createTable(tbl); - FileSystem fs = FileSystem.get(mrConf); - fs.setPermission(new Path(warehousedir, tableName), new FsPermission(tablePerm)); - } - - @AfterClass - public static void tearDown() throws IOException { - FileUtil.fullyDelete(workDir); - FileSystem fs = FileSystem.get(mrConf); - if (fs.exists(warehousedir)) { - fs.delete(warehousedir, true); - } - if (mrCluster != null) { - mrCluster.shutdown(); - } - } - - /** - * Simple test case. - *
    - *
  1. Submits a mapred job which writes out one fixed line to each of the tables
  2. - *
  3. uses hive fetch task to read the data and see if it matches what was written
  4. - *
- * - * @throws Exception if any error occurs - */ - @Test - public void testOutputFormat() throws Throwable { - HashMap partitionValues = new HashMap(); - partitionValues.put("ds", "1"); - partitionValues.put("cluster", "ag"); - ArrayList infoList = new ArrayList(); - infoList.add(OutputJobInfo.create("default", tableNames[0], partitionValues)); - infoList.add(OutputJobInfo.create("default", tableNames[1], partitionValues)); - infoList.add(OutputJobInfo.create("default", tableNames[2], partitionValues)); - - Job job = new Job(hiveConf, "SampleJob"); - - job.setMapperClass(MyMapper.class); - job.setInputFormatClass(TextInputFormat.class); - job.setOutputFormatClass(MultiOutputFormat.class); - job.setNumReduceTasks(0); - - JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); - - for (int i = 0; i < tableNames.length; i++) { - configurer.addOutputFormat(tableNames[i], HCatOutputFormat.class, BytesWritable.class, - HCatRecord.class); - HCatOutputFormat.setOutput(configurer.getJob(tableNames[i]), infoList.get(i)); - HCatOutputFormat.setSchema(configurer.getJob(tableNames[i]), - schemaMap.get(tableNames[i])); - } - configurer.configure(); - - Path filePath = createInputFile(); - FileInputFormat.addInputPath(job, filePath); - Assert.assertTrue(job.waitForCompletion(true)); - - ArrayList outputs = new ArrayList(); - for (String tbl : tableNames) { - outputs.add(getTableData(tbl, "default").get(0)); - } - Assert.assertEquals("Comparing output of table " + - tableNames[0] + " is not correct", outputs.get(0), "a,a,1,ag"); - Assert.assertEquals("Comparing output of table " + - tableNames[1] + " is not correct", outputs.get(1), "a,1,ag"); - Assert.assertEquals("Comparing output of table " + - tableNames[2] + " is not correct", outputs.get(2), "a,a,extra,1,ag"); - - // Check permisssion on partition dirs and files created - for (int i = 0; i < tableNames.length; i++) { - Path partitionFile = new Path(warehousedir + "/" + tableNames[i] - + "/ds=1/cluster=ag/part-m-00000"); - FileSystem fs = partitionFile.getFileSystem(mrConf); - Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct", - fs.getFileStatus(partitionFile).getPermission(), - new FsPermission(tablePerms[i])); - Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct", - fs.getFileStatus(partitionFile.getParent()).getPermission(), - new FsPermission(tablePerms[i])); - Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct", - fs.getFileStatus(partitionFile.getParent().getParent()).getPermission(), - new FsPermission(tablePerms[i])); - - } - LOG.info("File permissions verified"); - } - - /** - * Create a input file for map - * - * @return absolute path of the file. - * @throws IOException if any error encountered - */ - private Path createInputFile() throws IOException { - Path f = new Path(workDir + "/MultiTableInput.txt"); - FileSystem fs = FileSystem.get(mrConf); - if (fs.exists(f)) { - fs.delete(f, true); - } - OutputStream out = fs.create(f); - for (int i = 0; i < 3; i++) { - out.write("a,a\n".getBytes()); - } - out.close(); - return f; - } - - /** - * Method to fetch table data - * - * @param table table name - * @param database database - * @return list of columns in comma seperated way - * @throws Exception if any error occurs - */ - private List getTableData(String table, String database) throws Exception { - HiveConf conf = new HiveConf(); - conf.addResource("hive-site.xml"); - ArrayList results = new ArrayList(); - ArrayList temp = new ArrayList(); - Hive hive = Hive.get(conf); - org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table); - FetchWork work; - if (!tbl.getPartCols().isEmpty()) { - List partitions = hive.getPartitions(tbl); - List partDesc = new ArrayList(); - List partLocs = new ArrayList(); - for (Partition part : partitions) { - partLocs.add(part.getLocation()); - partDesc.add(Utilities.getPartitionDesc(part)); - } - work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl)); - work.setLimit(100); - } else { - work = new FetchWork(tbl.getDataLocation().toString(), Utilities.getTableDesc(tbl)); - } - FetchTask task = new FetchTask(); - task.setWork(work); - task.initialize(conf, null, null); - task.fetch(temp); - for (String str : temp) { - results.add(str.replace("\t", ",")); - } - return results; - } - - private static class MyMapper extends - Mapper { - - private int i = 0; - - @Override - protected void map(LongWritable key, Text value, Context context) - throws IOException, InterruptedException { - HCatRecord record = null; - String[] splits = value.toString().split(","); - switch (i) { - case 0: - record = new DefaultHCatRecord(2); - record.set(0, splits[0]); - record.set(1, splits[1]); - break; - case 1: - record = new DefaultHCatRecord(1); - record.set(0, splits[0]); - break; - case 2: - record = new DefaultHCatRecord(3); - record.set(0, splits[0]); - record.set(1, splits[1]); - record.set(2, "extra"); - break; - default: - Assert.fail("This should not happen!!!!!"); - } - MultiOutputFormat.write(tableNames[i], null, record, context); - i++; - } - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatNonPartitioned.java hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatNonPartitioned.java deleted file mode 100644 index 2fc94cc..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatNonPartitioned.java +++ /dev/null @@ -1,137 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hcatalog.common.ErrorType; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; -import org.junit.BeforeClass; -import org.junit.Test; - -import static junit.framework.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -public class TestHCatNonPartitioned extends HCatMapReduceTest { - - private static List writeRecords; - static List partitionColumns; - - @BeforeClass - public static void oneTimeSetUp() throws Exception { - - dbName = null; //test if null dbName works ("default" is used) - tableName = "testHCatNonPartitionedTable"; - - writeRecords = new ArrayList(); - - for (int i = 0; i < 20; i++) { - List objList = new ArrayList(); - - objList.add(i); - objList.add("strvalue" + i); - writeRecords.add(new DefaultHCatRecord(objList)); - } - - partitionColumns = new ArrayList(); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); - } - - @Override - protected List getPartitionKeys() { - List fields = new ArrayList(); - //empty list, non partitioned - return fields; - } - - @Override - protected List getTableColumns() { - List fields = new ArrayList(); - fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); - fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); - return fields; - } - - - @Test - public void testHCatNonPartitionedTable() throws Exception { - - Map partitionMap = new HashMap(); - runMRCreate(null, partitionColumns, writeRecords, 10, true); - - //Test for duplicate publish - IOException exc = null; - try { - runMRCreate(null, partitionColumns, writeRecords, 20, true); - } catch (IOException e) { - exc = e; - } - - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_NON_EMPTY_TABLE, ((HCatException) exc).getErrorType()); - - //Test for publish with invalid partition key name - exc = null; - partitionMap.clear(); - partitionMap.put("px", "p1value2"); - - try { - runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); - } catch (IOException e) { - exc = e; - } - - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_INVALID_PARTITION_VALUES, ((HCatException) exc).getErrorType()); - - //Read should get 10 rows - runMRRead(10); - - hiveReadTest(); - } - - //Test that data inserted through hcatoutputformat is readable from hive - private void hiveReadTest() throws Exception { - - String query = "select * from " + tableName; - int retCode = driver.run(query).getResponseCode(); - - if (retCode != 0) { - throw new Exception("Error " + retCode + " running query " + query); - } - - ArrayList res = new ArrayList(); - driver.getResults(res); - assertEquals(10, res.size()); - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatOutputFormat.java hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatOutputFormat.java deleted file mode 100644 index 8e72e06..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatOutputFormat.java +++ /dev/null @@ -1,167 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import junit.framework.TestCase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.SerDeInfo; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TestHCatOutputFormat extends TestCase { - - private static final Logger LOG = LoggerFactory.getLogger(TestHCatOutputFormat.class); - private HiveMetaStoreClient client; - private HiveConf hiveConf; - - private static final String dbName = "hcatOutputFormatTestDB"; - private static final String tblName = "hcatOutputFormatTestTable"; - - @Override - protected void setUp() throws Exception { - super.setUp(); - hiveConf = new HiveConf(this.getClass()); - - try { - client = new HiveMetaStoreClient(hiveConf, null); - - initTable(); - } catch (Throwable e) { - LOG.error("Unable to open the metastore", e); - throw new Exception(e); - } - } - - @Override - protected void tearDown() throws Exception { - try { - super.tearDown(); - client.dropTable(dbName, tblName); - client.dropDatabase(dbName); - - client.close(); - } catch (Throwable e) { - LOG.error("Unable to close metastore", e); - throw new Exception(e); - } - } - - private void initTable() throws Exception { - - try { - client.dropTable(dbName, tblName); - } catch (Exception e) { - } - try { - client.dropDatabase(dbName); - } catch (Exception e) { - } - client.createDatabase(new Database(dbName, "", null, null)); - assertNotNull((client.getDatabase(dbName).getLocationUri())); - - List fields = new ArrayList(); - fields.add(new FieldSchema("colname", serdeConstants.STRING_TYPE_NAME, "")); - - Table tbl = new Table(); - tbl.setDbName(dbName); - tbl.setTableName(tblName); - StorageDescriptor sd = new StorageDescriptor(); - sd.setCols(fields); - tbl.setSd(sd); - - //sd.setLocation("hdfs://tmp"); - sd.setInputFormat(RCFileInputFormat.class.getName()); - sd.setOutputFormat(RCFileOutputFormat.class.getName()); - sd.setParameters(new HashMap()); - sd.getParameters().put("test_param_1", "Use this for comments etc"); - //sd.setBucketCols(new ArrayList(2)); - //sd.getBucketCols().add("name"); - sd.setSerdeInfo(new SerDeInfo()); - sd.getSerdeInfo().setName(tbl.getTableName()); - sd.getSerdeInfo().setParameters(new HashMap()); - sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); - sd.getSerdeInfo().setSerializationLib( - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName()); - tbl.setPartitionKeys(fields); - - Map tableParams = new HashMap(); - tableParams.put("hcat.testarg", "testArgValue"); - - tbl.setParameters(tableParams); - - client.createTable(tbl); - Path tblPath = new Path(client.getTable(dbName, tblName).getSd().getLocation()); - assertTrue(tblPath.getFileSystem(hiveConf).mkdirs(new Path(tblPath, "colname=p1"))); - - } - - public void testSetOutput() throws Exception { - Configuration conf = new Configuration(); - Job job = new Job(conf, "test outputformat"); - - Map partitionValues = new HashMap(); - partitionValues.put("colname", "p1"); - //null server url means local mode - OutputJobInfo info = OutputJobInfo.create(dbName, tblName, partitionValues); - - HCatOutputFormat.setOutput(job, info); - OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(job); - - assertNotNull(jobInfo.getTableInfo()); - assertEquals(1, jobInfo.getPartitionValues().size()); - assertEquals("p1", jobInfo.getPartitionValues().get("colname")); - assertEquals(1, jobInfo.getTableInfo().getDataColumns().getFields().size()); - assertEquals("colname", jobInfo.getTableInfo().getDataColumns().getFields().get(0).getName()); - - publishTest(job); - } - - public void publishTest(Job job) throws Exception { - OutputCommitter committer = new FileOutputCommitterContainer(job, null); - committer.commitJob(job); - - Partition part = client.getPartition(dbName, tblName, Arrays.asList("p1")); - assertNotNull(part); - - StorerInfo storer = InternalUtil.extractStorerInfo(part.getSd(), part.getParameters()); - assertEquals(storer.getProperties().get("hcat.testarg"), "testArgValue"); - assertTrue(part.getSd().getLocation().indexOf("p1") != -1); - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatPartitionPublish.java hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatPartitionPublish.java deleted file mode 100644 index 320b0e9..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatPartitionPublish.java +++ /dev/null @@ -1,266 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.mapreduce; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Random; - -import junit.framework.Assert; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.SerDeInfo; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.MiniMRCluster; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hadoop.util.Shell; -import org.apache.hcatalog.NoExitSecurityManager; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; - -public class TestHCatPartitionPublish { - private static Configuration mrConf = null; - private static FileSystem fs = null; - private static MiniMRCluster mrCluster = null; - private static boolean isServerRunning = false; - private static final int msPort = 20101; - private static HiveConf hcatConf; - private static HiveMetaStoreClient msc; - private static SecurityManager securityManager; - - @BeforeClass - public static void setup() throws Exception { - String testDir = System.getProperty("test.data.dir", "./"); - testDir = testDir + "/test_hcat_partitionpublish_" + Math.abs(new Random().nextLong()) + "/"; - File workDir = new File(new File(testDir).getCanonicalPath()); - FileUtil.fullyDelete(workDir); - workDir.mkdirs(); - Configuration conf = new Configuration(true); - conf.set("yarn.scheduler.capacity.root.queues", "default"); - conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); - - fs = FileSystem.get(conf); - System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); - // LocalJobRunner does not work with mapreduce OutputCommitter. So need - // to use MiniMRCluster. MAPREDUCE-2350 - mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, - new JobConf(conf)); - mrConf = mrCluster.createJobConf(); - - if (isServerRunning) { - return; - } - - MetaStoreUtils.startMetaStore(msPort, ShimLoader - .getHadoopThriftAuthBridge()); - Thread.sleep(10000); - isServerRunning = true; - securityManager = System.getSecurityManager(); - System.setSecurityManager(new NoExitSecurityManager()); - - hcatConf = new HiveConf(TestHCatPartitionPublish.class); - hcatConf.set("hive.metastore.local", "false"); - hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" - + msPort); - hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); - hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); - hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, - "false"); - msc = new HiveMetaStoreClient(hcatConf, null); - System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); - System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); - } - - @AfterClass - public static void tearDown() throws IOException { - if (mrCluster != null) { - mrCluster.shutdown(); - } - System.setSecurityManager(securityManager); - isServerRunning = false; - } - - @Test - public void testPartitionPublish() throws Exception { - String dbName = "default"; - String tableName = "testHCatPartitionedTable"; - createTable(null, tableName); - - Map partitionMap = new HashMap(); - partitionMap.put("part1", "p1value1"); - partitionMap.put("part0", "p0value1"); - - ArrayList hcatTableColumns = new ArrayList(); - for (FieldSchema fs : getTableColumns()) { - hcatTableColumns.add(HCatSchemaUtils.getHCatFieldSchema(fs)); - } - - runMRCreateFail(dbName, tableName, partitionMap, hcatTableColumns); - List ptns = msc.listPartitionNames(dbName, tableName, - (short) 10); - Assert.assertEquals(0, ptns.size()); - Table table = msc.getTable(dbName, tableName); - Assert.assertTrue(table != null); - // In Windows, we cannot remove the output directory when job fail. See - // FileOutputCommitterContainer.abortJob - if (!Shell.WINDOWS) { - Assert.assertFalse(fs.exists(new Path(table.getSd().getLocation() - + "/part1=p1value1/part0=p0value1"))); - } - } - - void runMRCreateFail( - String dbName, String tableName, Map partitionValues, - List columns) throws Exception { - - Job job = new Job(mrConf, "hcat mapreduce write fail test"); - job.setJarByClass(this.getClass()); - job.setMapperClass(TestHCatPartitionPublish.MapFail.class); - - // input/output settings - job.setInputFormatClass(TextInputFormat.class); - - Path path = new Path(fs.getWorkingDirectory(), - "mapred/testHCatMapReduceInput"); - // The write count does not matter, as the map will fail in its first - // call. - createInputFile(path, 5); - - TextInputFormat.setInputPaths(job, path); - job.setOutputFormatClass(HCatOutputFormat.class); - OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, - partitionValues); - HCatOutputFormat.setOutput(job, outputJobInfo); - - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(DefaultHCatRecord.class); - - job.setNumReduceTasks(0); - - HCatOutputFormat.setSchema(job, new HCatSchema(columns)); - - boolean success = job.waitForCompletion(true); - Assert.assertTrue(success == false); - } - - private void createInputFile(Path path, int rowCount) throws IOException { - if (fs.exists(path)) { - fs.delete(path, true); - } - FSDataOutputStream os = fs.create(path); - for (int i = 0; i < rowCount; i++) { - os.writeChars(i + "\n"); - } - os.close(); - } - - public static class MapFail extends - Mapper { - - @Override - public void map(LongWritable key, Text value, Context context) - throws IOException, InterruptedException { - { - throw new IOException("Exception to mimic job failure."); - } - } - } - - private void createTable(String dbName, String tableName) throws Exception { - String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME - : dbName; - try { - msc.dropTable(databaseName, tableName); - } catch (Exception e) { - } // can fail with NoSuchObjectException - - Table tbl = new Table(); - tbl.setDbName(databaseName); - tbl.setTableName(tableName); - tbl.setTableType("MANAGED_TABLE"); - StorageDescriptor sd = new StorageDescriptor(); - sd.setCols(getTableColumns()); - tbl.setPartitionKeys(getPartitionKeys()); - tbl.setSd(sd); - sd.setBucketCols(new ArrayList(2)); - sd.setSerdeInfo(new SerDeInfo()); - sd.getSerdeInfo().setName(tbl.getTableName()); - sd.getSerdeInfo().setParameters(new HashMap()); - sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); - sd.getSerdeInfo().setSerializationLib(ColumnarSerDe.class.getName()); - sd.setInputFormat(RCFileInputFormat.class.getName()); - sd.setOutputFormat(RCFileOutputFormat.class.getName()); - - Map tableParams = new HashMap(); - tbl.setParameters(tableParams); - - msc.createTable(tbl); - } - - protected List getPartitionKeys() { - List fields = new ArrayList(); - // Defining partition names in unsorted order - fields.add(new FieldSchema("PaRT1", serdeConstants.STRING_TYPE_NAME, "")); - fields.add(new FieldSchema("part0", serdeConstants.STRING_TYPE_NAME, "")); - return fields; - } - - protected List getTableColumns() { - List fields = new ArrayList(); - fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); - fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); - return fields; - } - -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatPartitioned.java hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatPartitioned.java deleted file mode 100644 index dd2ac10..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestHCatPartitioned.java +++ /dev/null @@ -1,351 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hcatalog.common.ErrorType; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; -import org.junit.BeforeClass; -import org.junit.Test; - -import static junit.framework.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -public class TestHCatPartitioned extends HCatMapReduceTest { - - private static List writeRecords; - private static List partitionColumns; - - @BeforeClass - public static void oneTimeSetUp() throws Exception { - - tableName = "testHCatPartitionedTable"; - writeRecords = new ArrayList(); - - for (int i = 0; i < 20; i++) { - List objList = new ArrayList(); - - objList.add(i); - objList.add("strvalue" + i); - writeRecords.add(new DefaultHCatRecord(objList)); - } - - partitionColumns = new ArrayList(); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); - } - - - @Override - protected List getPartitionKeys() { - List fields = new ArrayList(); - //Defining partition names in unsorted order - fields.add(new FieldSchema("PaRT1", serdeConstants.STRING_TYPE_NAME, "")); - fields.add(new FieldSchema("part0", serdeConstants.STRING_TYPE_NAME, "")); - return fields; - } - - @Override - protected List getTableColumns() { - List fields = new ArrayList(); - fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); - fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); - return fields; - } - - - @Test - public void testHCatPartitionedTable() throws Exception { - - Map partitionMap = new HashMap(); - partitionMap.put("part1", "p1value1"); - partitionMap.put("part0", "p0value1"); - - runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); - - partitionMap.clear(); - partitionMap.put("PART1", "p1value2"); - partitionMap.put("PART0", "p0value2"); - - runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); - - //Test for duplicate publish - IOException exc = null; - try { - runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); - } catch (IOException e) { - exc = e; - } - - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_DUPLICATE_PARTITION, ((HCatException) exc).getErrorType()); - - //Test for publish with invalid partition key name - exc = null; - partitionMap.clear(); - partitionMap.put("px1", "p1value2"); - partitionMap.put("px0", "p0value2"); - - try { - runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); - } catch (IOException e) { - exc = e; - } - - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_MISSING_PARTITION_KEY, ((HCatException) exc).getErrorType()); - - //Test for publish with missing partition key values - exc = null; - partitionMap.clear(); - partitionMap.put("px", "p1value2"); - - try { - runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); - } catch (IOException e) { - exc = e; - } - - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_INVALID_PARTITION_VALUES, ((HCatException) exc).getErrorType()); - - - //Test for null partition value map - exc = null; - try { - runMRCreate(null, partitionColumns, writeRecords, 20, false); - } catch (IOException e) { - exc = e; - } - - assertTrue(exc == null); -// assertTrue(exc instanceof HCatException); -// assertEquals(ErrorType.ERROR_PUBLISHING_PARTITION, ((HCatException) exc).getErrorType()); - // With Dynamic partitioning, this isn't an error that the keyValues specified didn't values - - //Read should get 10 + 20 rows - runMRRead(30); - - //Read with partition filter - runMRRead(10, "part1 = \"p1value1\""); - runMRRead(20, "part1 = \"p1value2\""); - runMRRead(30, "part1 = \"p1value1\" or part1 = \"p1value2\""); - runMRRead(10, "part0 = \"p0value1\""); - runMRRead(20, "part0 = \"p0value2\""); - runMRRead(30, "part0 = \"p0value1\" or part0 = \"p0value2\""); - - tableSchemaTest(); - columnOrderChangeTest(); - hiveReadTest(); - } - - - //test that new columns gets added to table schema - private void tableSchemaTest() throws Exception { - - HCatSchema tableSchema = getTableSchema(); - - assertEquals(4, tableSchema.getFields().size()); - - //Update partition schema to have 3 fields - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); - - writeRecords = new ArrayList(); - - for (int i = 0; i < 20; i++) { - List objList = new ArrayList(); - - objList.add(i); - objList.add("strvalue" + i); - objList.add("str2value" + i); - - writeRecords.add(new DefaultHCatRecord(objList)); - } - - Map partitionMap = new HashMap(); - partitionMap.put("part1", "p1value5"); - partitionMap.put("part0", "p0value5"); - - runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); - - tableSchema = getTableSchema(); - - //assert that c3 has got added to table schema - assertEquals(5, tableSchema.getFields().size()); - assertEquals("c1", tableSchema.getFields().get(0).getName()); - assertEquals("c2", tableSchema.getFields().get(1).getName()); - assertEquals("c3", tableSchema.getFields().get(2).getName()); - assertEquals("part1", tableSchema.getFields().get(3).getName()); - assertEquals("part0", tableSchema.getFields().get(4).getName()); - - //Test that changing column data type fails - partitionMap.clear(); - partitionMap.put("part1", "p1value6"); - partitionMap.put("part0", "p0value6"); - - partitionColumns = new ArrayList(); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.INT_TYPE_NAME, ""))); - - IOException exc = null; - try { - runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); - } catch (IOException e) { - exc = e; - } - - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, ((HCatException) exc).getErrorType()); - - //Test that partition key is not allowed in data - partitionColumns = new ArrayList(); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("part1", serdeConstants.STRING_TYPE_NAME, ""))); - - List recordsContainingPartitionCols = new ArrayList(20); - for (int i = 0; i < 20; i++) { - List objList = new ArrayList(); - - objList.add(i); - objList.add("c2value" + i); - objList.add("c3value" + i); - objList.add("p1value6"); - - recordsContainingPartitionCols.add(new DefaultHCatRecord(objList)); - } - - exc = null; - try { - runMRCreate(partitionMap, partitionColumns, recordsContainingPartitionCols, 20, true); - } catch (IOException e) { - exc = e; - } - - List records = runMRRead(20, "part1 = \"p1value6\""); - assertEquals(20, records.size()); - records = runMRRead(20, "part0 = \"p0value6\""); - assertEquals(20, records.size()); - Integer i = 0; - for (HCatRecord rec : records) { - assertEquals(5, rec.size()); - assertTrue(rec.get(0).equals(i)); - assertTrue(rec.get(1).equals("c2value" + i)); - assertTrue(rec.get(2).equals("c3value" + i)); - assertTrue(rec.get(3).equals("p1value6")); - assertTrue(rec.get(4).equals("p0value6")); - i++; - } - } - - //check behavior while change the order of columns - private void columnOrderChangeTest() throws Exception { - - HCatSchema tableSchema = getTableSchema(); - - assertEquals(5, tableSchema.getFields().size()); - - partitionColumns = new ArrayList(); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); - - - writeRecords = new ArrayList(); - - for (int i = 0; i < 10; i++) { - List objList = new ArrayList(); - - objList.add(i); - objList.add("co strvalue" + i); - objList.add("co str2value" + i); - - writeRecords.add(new DefaultHCatRecord(objList)); - } - - Map partitionMap = new HashMap(); - partitionMap.put("part1", "p1value8"); - partitionMap.put("part0", "p0value8"); - - Exception exc = null; - try { - runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); - } catch (IOException e) { - exc = e; - } - - assertTrue(exc != null); - assertTrue(exc instanceof HCatException); - assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType()); - - - partitionColumns = new ArrayList(); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); - partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); - - writeRecords = new ArrayList(); - - for (int i = 0; i < 10; i++) { - List objList = new ArrayList(); - - objList.add(i); - objList.add("co strvalue" + i); - - writeRecords.add(new DefaultHCatRecord(objList)); - } - - runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); - - //Read should get 10 + 20 + 10 + 10 + 20 rows - runMRRead(70); - } - - //Test that data inserted through hcatoutputformat is readable from hive - private void hiveReadTest() throws Exception { - - String query = "select * from " + tableName; - int retCode = driver.run(query).getResponseCode(); - - if (retCode != 0) { - throw new Exception("Error " + retCode + " running query " + query); - } - - ArrayList res = new ArrayList(); - driver.getResults(res); - assertEquals(70, res.size()); - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestInputJobInfo.java hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestInputJobInfo.java deleted file mode 100644 index 9a9f432..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestInputJobInfo.java +++ /dev/null @@ -1,48 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.mapreduce; - -import java.util.Properties; - -import junit.framework.Assert; - -import org.junit.Test; - -public class TestInputJobInfo extends HCatBaseTest { - - @Test - public void test4ArgCreate() throws Exception { - Properties p = new Properties(); - p.setProperty("key", "value"); - InputJobInfo jobInfo = InputJobInfo.create("Db", "Table", "Filter", p); - Assert.assertEquals("Db", jobInfo.getDatabaseName()); - Assert.assertEquals("Table", jobInfo.getTableName()); - Assert.assertEquals("Filter", jobInfo.getFilter()); - Assert.assertEquals("value", jobInfo.getProperties().getProperty("key")); - } - - @Test - public void test3ArgCreate() throws Exception { - InputJobInfo jobInfo = InputJobInfo.create("Db", "Table", "Filter"); - Assert.assertEquals("Db", jobInfo.getDatabaseName()); - Assert.assertEquals("Table", jobInfo.getTableName()); - Assert.assertEquals("Filter", jobInfo.getFilter()); - Assert.assertEquals(0, jobInfo.getProperties().size()); - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestMultiOutputFormat.java hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestMultiOutputFormat.java deleted file mode 100644 index c756207..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestMultiOutputFormat.java +++ /dev/null @@ -1,334 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.net.URI; -import java.util.Arrays; -import java.util.List; -import java.util.Random; -import java.util.StringTokenizer; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.filecache.DistributedCache; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.SequenceFile; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.MiniMRCluster; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.hcatalog.mapreduce.MultiOutputFormat.JobConfigurer; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TestMultiOutputFormat { - - private static final Logger LOG = LoggerFactory.getLogger(TestMultiOutputFormat.class); - private static File workDir; - private static Configuration mrConf = null; - private static FileSystem fs = null; - private static MiniMRCluster mrCluster = null; - - @BeforeClass - public static void setup() throws IOException { - createWorkDir(); - Configuration conf = new Configuration(true); - conf.set("yarn.scheduler.capacity.root.queues", "default"); - conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); - - fs = FileSystem.get(conf); - System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); - // LocalJobRunner does not work with mapreduce OutputCommitter. So need - // to use MiniMRCluster. MAPREDUCE-2350 - mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, - new JobConf(conf)); - mrConf = mrCluster.createJobConf(); - } - - private static void createWorkDir() throws IOException { - String testDir = System.getProperty("test.data.dir", "./"); - testDir = testDir + "/test_multiout_" + Math.abs(new Random().nextLong()) + "/"; - workDir = new File(new File(testDir).getCanonicalPath()); - FileUtil.fullyDelete(workDir); - workDir.mkdirs(); - } - - @AfterClass - public static void tearDown() throws IOException { - if (mrCluster != null) { - mrCluster.shutdown(); - } - FileUtil.fullyDelete(workDir); - } - - /** - * A test job that reads a input file and outputs each word and the index of - * the word encountered to a text file and sequence file with different key - * values. - */ - @Test - public void testMultiOutputFormatWithoutReduce() throws Throwable { - Job job = new Job(mrConf, "MultiOutNoReduce"); - job.setMapperClass(MultiOutWordIndexMapper.class); - job.setJarByClass(this.getClass()); - job.setInputFormatClass(TextInputFormat.class); - job.setOutputFormatClass(MultiOutputFormat.class); - job.setNumReduceTasks(0); - - JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); - configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class); - configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, - IntWritable.class); - Path outDir = new Path(workDir.getPath(), job.getJobName()); - FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1")); - FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2")); - - String fileContent = "Hello World"; - String inputFile = createInputFile(fileContent); - FileInputFormat.setInputPaths(job, new Path(inputFile)); - - //Test for merging of configs - DistributedCache.addFileToClassPath(new Path(inputFile), job.getConfiguration(), fs); - String dummyFile = createInputFile("dummy file"); - DistributedCache.addFileToClassPath(new Path(dummyFile), configurer.getJob("out1") - .getConfiguration(), fs); - // duplicate of the value. Merging should remove duplicates - DistributedCache.addFileToClassPath(new Path(inputFile), configurer.getJob("out2") - .getConfiguration(), fs); - - configurer.configure(); - - // Verify if the configs are merged - Path[] fileClassPaths = DistributedCache.getFileClassPaths(job.getConfiguration()); - List fileClassPathsList = Arrays.asList(fileClassPaths); - Assert.assertTrue(fileClassPathsList.contains(new Path(inputFile))); - Assert.assertTrue(fileClassPathsList.contains(new Path(dummyFile))); - - URI[] cacheFiles = DistributedCache.getCacheFiles(job.getConfiguration()); - List cacheFilesList = Arrays.asList(cacheFiles); - Assert.assertTrue(cacheFilesList.contains(new Path(inputFile).makeQualified(fs).toUri())); - Assert.assertTrue(cacheFilesList.contains(new Path(dummyFile).makeQualified(fs).toUri())); - - Assert.assertTrue(job.waitForCompletion(true)); - - Path textOutPath = new Path(outDir, "out1/part-m-00000"); - String[] textOutput = readFully(textOutPath).split("\n"); - Path seqOutPath = new Path(outDir, "out2/part-m-00000"); - SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf); - Text key = new Text(); - IntWritable value = new IntWritable(); - String[] words = fileContent.split(" "); - Assert.assertEquals(words.length, textOutput.length); - LOG.info("Verifying file contents"); - for (int i = 0; i < words.length; i++) { - Assert.assertEquals((i + 1) + "\t" + words[i], textOutput[i]); - reader.next(key, value); - Assert.assertEquals(words[i], key.toString()); - Assert.assertEquals((i + 1), value.get()); - } - Assert.assertFalse(reader.next(key, value)); - } - - /** - * A word count test job that reads a input file and outputs the count of - * words to a text file and sequence file with different key values. - */ - @Test - public void testMultiOutputFormatWithReduce() throws Throwable { - Job job = new Job(mrConf, "MultiOutWithReduce"); - - job.setMapperClass(WordCountMapper.class); - job.setReducerClass(MultiOutWordCountReducer.class); - job.setJarByClass(this.getClass()); - job.setInputFormatClass(TextInputFormat.class); - job.setOutputFormatClass(MultiOutputFormat.class); - job.setMapOutputKeyClass(Text.class); - job.setMapOutputValueClass(IntWritable.class); - - JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); - - configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class); - configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, - IntWritable.class); - configurer.addOutputFormat("out3", NullOutputFormat.class, Text.class, - IntWritable.class); - Path outDir = new Path(workDir.getPath(), job.getJobName()); - FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1")); - FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2")); - - configurer.configure(); - - String fileContent = "Hello World Hello World World"; - String inputFile = createInputFile(fileContent); - FileInputFormat.setInputPaths(job, new Path(inputFile)); - - Assert.assertTrue(job.waitForCompletion(true)); - - Path textOutPath = new Path(outDir, "out1/part-r-00000"); - String[] textOutput = readFully(textOutPath).split("\n"); - Path seqOutPath = new Path(outDir, "out2/part-r-00000"); - SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf); - Text key = new Text(); - IntWritable value = new IntWritable(); - String[] words = "Hello World".split(" "); - Assert.assertEquals(words.length, textOutput.length); - for (int i = 0; i < words.length; i++) { - Assert.assertEquals((i + 2) + "\t" + words[i], textOutput[i]); - reader.next(key, value); - Assert.assertEquals(words[i], key.toString()); - Assert.assertEquals((i + 2), value.get()); - } - Assert.assertFalse(reader.next(key, value)); - - } - - - /** - * Create a file for map input - * - * @return absolute path of the file. - * @throws IOException if any error encountered - */ - private String createInputFile(String content) throws IOException { - File f = File.createTempFile("input", "txt"); - FileWriter writer = new FileWriter(f); - writer.write(content); - writer.close(); - return f.getAbsolutePath(); - } - - private String readFully(Path file) throws IOException { - FSDataInputStream in = fs.open(file); - byte[] b = new byte[in.available()]; - in.readFully(b); - in.close(); - return new String(b); - } - - private static class MultiOutWordIndexMapper extends - Mapper { - - private IntWritable index = new IntWritable(1); - private Text word = new Text(); - - @Override - protected void map(LongWritable key, Text value, Context context) - throws IOException, InterruptedException { - StringTokenizer itr = new StringTokenizer(value.toString()); - while (itr.hasMoreTokens()) { - word.set(itr.nextToken()); - MultiOutputFormat.write("out1", index, word, context); - MultiOutputFormat.write("out2", word, index, context); - index.set(index.get() + 1); - } - } - } - - private static class WordCountMapper extends - Mapper { - - private final static IntWritable one = new IntWritable(1); - private Text word = new Text(); - - @Override - protected void map(LongWritable key, Text value, Context context) - throws IOException, InterruptedException { - StringTokenizer itr = new StringTokenizer(value.toString()); - while (itr.hasMoreTokens()) { - word.set(itr.nextToken()); - context.write(word, one); - } - } - } - - private static class MultiOutWordCountReducer extends - Reducer { - - private IntWritable count = new IntWritable(); - - @Override - protected void reduce(Text word, Iterable values, Context context) - throws IOException, InterruptedException { - int sum = 0; - for (IntWritable val : values) { - sum += val.get(); - } - count.set(sum); - MultiOutputFormat.write("out1", count, word, context); - MultiOutputFormat.write("out2", word, count, context); - MultiOutputFormat.write("out3", word, count, context); - } - } - - private static class NullOutputFormat extends - org.apache.hadoop.mapreduce.lib.output.NullOutputFormat { - - @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context) { - return new OutputCommitter() { - public void abortTask(TaskAttemptContext taskContext) { - } - - public void cleanupJob(JobContext jobContext) { - } - - public void commitJob(JobContext jobContext) { - } - - public void commitTask(TaskAttemptContext taskContext) { - Assert.fail("needsTaskCommit is false but commitTask was called"); - } - - public boolean needsTaskCommit(TaskAttemptContext taskContext) { - return false; - } - - public void setupJob(JobContext jobContext) { - } - - public void setupTask(TaskAttemptContext taskContext) { - } - }; - } - } - -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestPassProperties.java hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestPassProperties.java deleted file mode 100644 index bd3a503..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestPassProperties.java +++ /dev/null @@ -1,143 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import static org.junit.Assert.assertTrue; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hcatalog.HcatTestUtils; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.junit.Test; - -public class TestPassProperties { - private static final String TEST_DATA_DIR = System.getProperty("user.dir") + - "/build/test/data/" + TestSequenceFileReadWrite.class.getCanonicalName(); - private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; - private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; - - private static Driver driver; - private static PigServer server; - private static String[] input; - private static HiveConf hiveConf; - - public void Initialize() throws Exception { - hiveConf = new HiveConf(this.getClass()); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); - driver = new Driver(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); - - new File(TEST_WAREHOUSE_DIR).mkdirs(); - - int numRows = 3; - input = new String[numRows]; - for (int i = 0; i < numRows; i++) { - String col1 = "a" + i; - String col2 = "b" + i; - input[i] = i + "," + col1 + "," + col2; - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - server = new PigServer(ExecType.LOCAL); - } - - @Test - public void testSequenceTableWriteReadMR() throws Exception { - Initialize(); - String createTable = "CREATE TABLE bad_props_table(a0 int, a1 String, a2 String) STORED AS SEQUENCEFILE"; - driver.run("drop table bad_props_table"); - int retCode1 = driver.run(createTable).getResponseCode(); - assertTrue(retCode1 == 0); - - boolean caughtException = false; - try { - Configuration conf = new Configuration(); - conf.set("hive.metastore.uris", "thrift://no.such.machine:10888"); - conf.set("hive.metastore.local", "false"); - Job job = new Job(conf, "Write-hcat-seq-table"); - job.setJarByClass(TestSequenceFileReadWrite.class); - - job.setMapperClass(Map.class); - job.setOutputKeyClass(NullWritable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - job.setInputFormatClass(TextInputFormat.class); - TextInputFormat.setInputPaths(job, INPUT_FILE_NAME); - - HCatOutputFormat.setOutput(job, OutputJobInfo.create( - MetaStoreUtils.DEFAULT_DATABASE_NAME, "bad_props_table", null)); - job.setOutputFormatClass(HCatOutputFormat.class); - HCatOutputFormat.setSchema(job, getSchema()); - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); - new FileOutputCommitterContainer(job, null).cleanupJob(job); - } catch (Exception e) { - caughtException = true; - assertTrue(e.getMessage().contains( - "Could not connect to meta store using any of the URIs provided")); - } - assertTrue(caughtException); - } - - public static class Map extends Mapper { - - public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - String[] cols = value.toString().split(","); - DefaultHCatRecord record = new DefaultHCatRecord(3); - record.set(0, Integer.parseInt(cols[0])); - record.set(1, cols[1]); - record.set(2, cols[2]); - context.write(NullWritable.get(), record); - } - } - - private HCatSchema getSchema() throws HCatException { - HCatSchema schema = new HCatSchema(new ArrayList()); - schema.append(new HCatFieldSchema("a0", HCatFieldSchema.Type.INT, - "")); - schema.append(new HCatFieldSchema("a1", - HCatFieldSchema.Type.STRING, "")); - schema.append(new HCatFieldSchema("a2", - HCatFieldSchema.Type.STRING, "")); - return schema; - } - - -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestSequenceFileReadWrite.java hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestSequenceFileReadWrite.java deleted file mode 100644 index 999069b..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/mapreduce/TestSequenceFileReadWrite.java +++ /dev/null @@ -1,265 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.mapreduce; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; - -import junit.framework.TestCase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hcatalog.HcatTestUtils; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.data.Tuple; -import org.junit.Test; - -public class TestSequenceFileReadWrite extends TestCase { - private static final String TEST_DATA_DIR = - "/tmp/build/test/data/" + TestSequenceFileReadWrite.class.getCanonicalName(); - private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; - private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; - - private static Driver driver; - private static PigServer server; - private static String[] input; - private static HiveConf hiveConf; - - public void Initialize() throws Exception { - hiveConf = new HiveConf(this.getClass()); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); - driver = new Driver(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); - - new File(TEST_WAREHOUSE_DIR).mkdirs(); - - int numRows = 3; - input = new String[numRows]; - for (int i = 0; i < numRows; i++) { - String col1 = "a" + i; - String col2 = "b" + i; - input[i] = i + "," + col1 + "," + col2; - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - server = new PigServer(ExecType.LOCAL); - } - - @Test - public void testSequenceTableWriteRead() throws Exception { - Initialize(); - String createTable = "CREATE TABLE demo_table(a0 int, a1 String, a2 String) STORED AS SEQUENCEFILE"; - driver.run("drop table demo_table"); - int retCode1 = driver.run(createTable).getResponseCode(); - assertTrue(retCode1 == 0); - - server.setBatchOn(); - server.registerQuery("A = load '" - + INPUT_FILE_NAME - + "' using PigStorage(',') as (a0:int,a1:chararray,a2:chararray);"); - server.registerQuery("store A into 'demo_table' using org.apache.hcatalog.pig.HCatStorer();"); - server.executeBatch(); - - server.registerQuery("B = load 'demo_table' using org.apache.hcatalog.pig.HCatLoader();"); - Iterator XIter = server.openIterator("B"); - int numTuplesRead = 0; - while (XIter.hasNext()) { - Tuple t = XIter.next(); - assertEquals(3, t.size()); - assertEquals(t.get(0).toString(), "" + numTuplesRead); - assertEquals(t.get(1).toString(), "a" + numTuplesRead); - assertEquals(t.get(2).toString(), "b" + numTuplesRead); - numTuplesRead++; - } - assertEquals(input.length, numTuplesRead); - } - - @Test - public void testTextTableWriteRead() throws Exception { - Initialize(); - String createTable = "CREATE TABLE demo_table_1(a0 int, a1 String, a2 String) STORED AS TEXTFILE"; - driver.run("drop table demo_table_1"); - int retCode1 = driver.run(createTable).getResponseCode(); - assertTrue(retCode1 == 0); - - server.setBatchOn(); - server.registerQuery("A = load '" - + INPUT_FILE_NAME - + "' using PigStorage(',') as (a0:int,a1:chararray,a2:chararray);"); - server.registerQuery("store A into 'demo_table_1' using org.apache.hcatalog.pig.HCatStorer();"); - server.executeBatch(); - - server.registerQuery("B = load 'demo_table_1' using org.apache.hcatalog.pig.HCatLoader();"); - Iterator XIter = server.openIterator("B"); - int numTuplesRead = 0; - while (XIter.hasNext()) { - Tuple t = XIter.next(); - assertEquals(3, t.size()); - assertEquals(t.get(0).toString(), "" + numTuplesRead); - assertEquals(t.get(1).toString(), "a" + numTuplesRead); - assertEquals(t.get(2).toString(), "b" + numTuplesRead); - numTuplesRead++; - } - assertEquals(input.length, numTuplesRead); - } - - @Test - public void testSequenceTableWriteReadMR() throws Exception { - Initialize(); - String createTable = "CREATE TABLE demo_table_2(a0 int, a1 String, a2 String) STORED AS SEQUENCEFILE"; - driver.run("drop table demo_table_2"); - int retCode1 = driver.run(createTable).getResponseCode(); - assertTrue(retCode1 == 0); - - Configuration conf = new Configuration(); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(hiveConf.getAllProperties())); - Job job = new Job(conf, "Write-hcat-seq-table"); - job.setJarByClass(TestSequenceFileReadWrite.class); - - job.setMapperClass(Map.class); - job.setOutputKeyClass(NullWritable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - job.setInputFormatClass(TextInputFormat.class); - TextInputFormat.setInputPaths(job, INPUT_FILE_NAME); - - HCatOutputFormat.setOutput(job, OutputJobInfo.create( - MetaStoreUtils.DEFAULT_DATABASE_NAME, "demo_table_2", null)); - job.setOutputFormatClass(HCatOutputFormat.class); - HCatOutputFormat.setSchema(job, getSchema()); - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); - if (!HCatUtil.isHadoop23()) { - new FileOutputCommitterContainer(job, null).commitJob(job); - } - assertTrue(job.isSuccessful()); - - server.setBatchOn(); - server.registerQuery("C = load 'default.demo_table_2' using org.apache.hcatalog.pig.HCatLoader();"); - server.executeBatch(); - Iterator XIter = server.openIterator("C"); - int numTuplesRead = 0; - while (XIter.hasNext()) { - Tuple t = XIter.next(); - assertEquals(3, t.size()); - assertEquals(t.get(0).toString(), "" + numTuplesRead); - assertEquals(t.get(1).toString(), "a" + numTuplesRead); - assertEquals(t.get(2).toString(), "b" + numTuplesRead); - numTuplesRead++; - } - assertEquals(input.length, numTuplesRead); - } - - @Test - public void testTextTableWriteReadMR() throws Exception { - Initialize(); - String createTable = "CREATE TABLE demo_table_3(a0 int, a1 String, a2 String) STORED AS TEXTFILE"; - driver.run("drop table demo_table_3"); - int retCode1 = driver.run(createTable).getResponseCode(); - assertTrue(retCode1 == 0); - - Configuration conf = new Configuration(); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(hiveConf.getAllProperties())); - Job job = new Job(conf, "Write-hcat-text-table"); - job.setJarByClass(TestSequenceFileReadWrite.class); - - job.setMapperClass(Map.class); - job.setOutputKeyClass(NullWritable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - job.setInputFormatClass(TextInputFormat.class); - job.setNumReduceTasks(0); - TextInputFormat.setInputPaths(job, INPUT_FILE_NAME); - - HCatOutputFormat.setOutput(job, OutputJobInfo.create( - MetaStoreUtils.DEFAULT_DATABASE_NAME, "demo_table_3", null)); - job.setOutputFormatClass(HCatOutputFormat.class); - HCatOutputFormat.setSchema(job, getSchema()); - assertTrue(job.waitForCompletion(true)); - if (!HCatUtil.isHadoop23()) { - new FileOutputCommitterContainer(job, null).commitJob(job); - } - assertTrue(job.isSuccessful()); - - server.setBatchOn(); - server.registerQuery("D = load 'default.demo_table_3' using org.apache.hcatalog.pig.HCatLoader();"); - server.executeBatch(); - Iterator XIter = server.openIterator("D"); - int numTuplesRead = 0; - while (XIter.hasNext()) { - Tuple t = XIter.next(); - assertEquals(3, t.size()); - assertEquals(t.get(0).toString(), "" + numTuplesRead); - assertEquals(t.get(1).toString(), "a" + numTuplesRead); - assertEquals(t.get(2).toString(), "b" + numTuplesRead); - numTuplesRead++; - } - assertEquals(input.length, numTuplesRead); - } - - - public static class Map extends Mapper { - - public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - String[] cols = value.toString().split(","); - DefaultHCatRecord record = new DefaultHCatRecord(3); - record.set(0, Integer.parseInt(cols[0])); - record.set(1, cols[1]); - record.set(2, cols[2]); - context.write(NullWritable.get(), record); - } - } - - private HCatSchema getSchema() throws HCatException { - HCatSchema schema = new HCatSchema(new ArrayList()); - schema.append(new HCatFieldSchema("a0", HCatFieldSchema.Type.INT, - "")); - schema.append(new HCatFieldSchema("a1", - HCatFieldSchema.Type.STRING, "")); - schema.append(new HCatFieldSchema("a2", - HCatFieldSchema.Type.STRING, "")); - return schema; - } - -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/rcfile/TestRCFileInputStorageDriver.java.broken hcatalog/core/src/test/java/org/apache/hcatalog/rcfile/TestRCFileInputStorageDriver.java.broken deleted file mode 100644 index 3942b07..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/rcfile/TestRCFileInputStorageDriver.java.broken +++ /dev/null @@ -1,294 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.rcfile; - -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.util.*; - -import junit.framework.Assert; -import junit.framework.TestCase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.ql.io.RCFile; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.serde.Constants; -import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; -import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; -import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; -import org.apache.hadoop.io.compress.DefaultCodec; -import org.apache.hadoop.mapreduce.InputFormat; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.JobID; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.TaskAttemptID; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatDataCheckUtil; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.rcfile.RCFileInputDriver; -import org.apache.hcatalog.shims.HCatHadoopShims; - - -public class TestRCFileInputStorageDriver extends TestCase{ - private static final Configuration conf = new Configuration(); - private static final Path dir = new Path(System.getProperty("test.data.dir", ".") + "/mapred"); - private static final Path file = new Path(dir, "test_rcfile"); - private final HCatHadoopShims shim = HCatHadoopShims.Instance.get(); - - // Generate sample records to compare against - private byte[][][] getRecords() throws UnsupportedEncodingException { - byte[][] record_1 = {"123".getBytes("UTF-8"), "456".getBytes("UTF-8"), - "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"), - "5.3".getBytes("UTF-8"), "hcatalog and hadoop".getBytes("UTF-8"), - new byte[0], "\\N".getBytes("UTF-8")}; - byte[][] record_2 = {"100".getBytes("UTF-8"), "200".getBytes("UTF-8"), - "123".getBytes("UTF-8"), "1000".getBytes("UTF-8"), - "5.3".getBytes("UTF-8"), "hcatalog and hadoop".getBytes("UTF-8"), - new byte[0], "\\N".getBytes("UTF-8")}; - return new byte[][][]{record_1, record_2}; - } - - // Write sample records to file for individual tests - private BytesRefArrayWritable[] initTestEnvironment() throws IOException { - FileSystem fs = FileSystem.getLocal(conf); - fs.delete(file, true); - - byte [][][] records = getRecords(); - RCFileOutputFormat.setColumnNumber(conf, 8); - RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec()); - - BytesRefArrayWritable bytes = writeBytesToFile(records[0], writer); - BytesRefArrayWritable bytes2 = writeBytesToFile(records[1], writer); - - writer.close(); - return new BytesRefArrayWritable[]{bytes,bytes2}; - } - - private BytesRefArrayWritable writeBytesToFile(byte[][] record, RCFile.Writer writer) throws IOException { - BytesRefArrayWritable bytes = new BytesRefArrayWritable(record.length); - for (int i = 0; i < record.length; i++) { - BytesRefWritable cu = new BytesRefWritable(record[i], 0, record[i].length); - bytes.set(i, cu); - } - writer.append(bytes); - return bytes; - } - - public void testConvertValueToTuple() throws IOException,InterruptedException{ - BytesRefArrayWritable[] bytesArr = initTestEnvironment(); - - HCatSchema schema = buildHiveSchema(); - RCFileInputDriver sd = new RCFileInputDriver(); - JobContext jc = shim.createJobContext(conf, new JobID()); - sd.setInputPath(jc, file.toString()); - InputFormat iF = sd.getInputFormat(null); - InputSplit split = iF.getSplits(jc).get(0); - sd.setOriginalSchema(jc, schema); - sd.setOutputSchema(jc, schema); - sd.initialize(jc, getProps()); - - TaskAttemptContext tac = shim.createTaskAttemptContext(conf, new TaskAttemptID()); - RecordReader rr = iF.createRecordReader(split,tac); - rr.initialize(split, tac); - HCatRecord[] tuples = getExpectedRecords(); - for(int j=0; j < 2; j++){ - Assert.assertTrue(rr.nextKeyValue()); - BytesRefArrayWritable w = (BytesRefArrayWritable)rr.getCurrentValue(); - Assert.assertEquals(bytesArr[j], w); - HCatRecord t = sd.convertToHCatRecord(null,w); - Assert.assertEquals(8, t.size()); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(t,tuples[j])); - } - } - - public void testPruning() throws IOException,InterruptedException{ - BytesRefArrayWritable[] bytesArr = initTestEnvironment(); - - RCFileInputDriver sd = new RCFileInputDriver(); - JobContext jc = shim.createJobContext(conf, new JobID()); - sd.setInputPath(jc, file.toString()); - InputFormat iF = sd.getInputFormat(null); - InputSplit split = iF.getSplits(jc).get(0); - sd.setOriginalSchema(jc, buildHiveSchema()); - sd.setOutputSchema(jc, buildPrunedSchema()); - - sd.initialize(jc, getProps()); - conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR,jc.getConfiguration().get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR)); - TaskAttemptContext tac = shim.createTaskAttemptContext(conf, new TaskAttemptID()); - RecordReader rr = iF.createRecordReader(split,tac); - rr.initialize(split, tac); - HCatRecord[] tuples = getPrunedRecords(); - for(int j=0; j < 2; j++){ - Assert.assertTrue(rr.nextKeyValue()); - BytesRefArrayWritable w = (BytesRefArrayWritable)rr.getCurrentValue(); - Assert.assertFalse(bytesArr[j].equals(w)); - Assert.assertEquals(w.size(), 8); - HCatRecord t = sd.convertToHCatRecord(null,w); - Assert.assertEquals(5, t.size()); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(t,tuples[j])); - } - assertFalse(rr.nextKeyValue()); - } - - public void testReorderdCols() throws IOException,InterruptedException{ - BytesRefArrayWritable[] bytesArr = initTestEnvironment(); - - RCFileInputDriver sd = new RCFileInputDriver(); - JobContext jc = shim.createJobContext(conf, new JobID()); - sd.setInputPath(jc, file.toString()); - InputFormat iF = sd.getInputFormat(null); - InputSplit split = iF.getSplits(jc).get(0); - sd.setOriginalSchema(jc, buildHiveSchema()); - sd.setOutputSchema(jc, buildReorderedSchema()); - - sd.initialize(jc, getProps()); - Map map = new HashMap(1); - map.put("part1", "first-part"); - sd.setPartitionValues(jc, map); - conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR,jc.getConfiguration().get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR)); - TaskAttemptContext tac = shim.createTaskAttemptContext(conf, new TaskAttemptID()); - RecordReader rr = iF.createRecordReader(split,tac); - rr.initialize(split, tac); - HCatRecord[] tuples = getReorderedCols(); - for(int j=0; j < 2; j++){ - Assert.assertTrue(rr.nextKeyValue()); - BytesRefArrayWritable w = (BytesRefArrayWritable)rr.getCurrentValue(); - Assert.assertFalse(bytesArr[j].equals(w)); - Assert.assertEquals(w.size(), 8); - HCatRecord t = sd.convertToHCatRecord(null,w); - Assert.assertEquals(7, t.size()); - Assert.assertTrue(HCatDataCheckUtil.recordsEqual(t,tuples[j])); - } - assertFalse(rr.nextKeyValue()); - } - private HCatRecord[] getExpectedRecords(){ - List rec_1 = new ArrayList(8); - Collections.addAll(rec_1, new Byte("123"), - new Short("456"), - new Integer(789), - new Long(1000L), - new Double(5.3D), - new String("hcatalog and hadoop"), - null, - null); - - HCatRecord tup_1 = new DefaultHCatRecord(rec_1); - - List rec_2 = new ArrayList(8); - Collections.addAll(rec_2, new Byte("100"), - new Short("200"), - new Integer(123), - new Long(1000L), - new Double(5.3D), - new String("hcatalog and hadoop"), - null, - null); - HCatRecord tup_2 = new DefaultHCatRecord(rec_2); - - return new HCatRecord[]{tup_1,tup_2}; - } - - private HCatRecord[] getPrunedRecords(){ - List rec_1 = new ArrayList(8); - Collections.addAll(rec_1, new Byte("123"), - new Integer(789), - new Double(5.3D), - new String("hcatalog and hadoop"), - null); - HCatRecord tup_1 = new DefaultHCatRecord(rec_1); - - List rec_2 = new ArrayList(8); - Collections.addAll(rec_2, new Byte("100"), - new Integer(123), - new Double(5.3D), - new String("hcatalog and hadoop"), - null); - HCatRecord tup_2 = new DefaultHCatRecord(rec_2); - - return new HCatRecord[]{tup_1,tup_2}; - } - - private HCatSchema buildHiveSchema() throws HCatException{ - return new HCatSchema(HCatUtil.getHCatFieldSchemaList(new FieldSchema("atinyint", "tinyint", ""), - new FieldSchema("asmallint", "smallint", ""), - new FieldSchema("aint", "int", ""), - new FieldSchema("along", "bigint", ""), - new FieldSchema("adouble", "double", ""), - new FieldSchema("astring", "string", ""), - new FieldSchema("anullint", "int", ""), - new FieldSchema("anullstring", "string", ""))); - } - - private HCatSchema buildPrunedSchema() throws HCatException{ - return new HCatSchema(HCatUtil.getHCatFieldSchemaList(new FieldSchema("atinyint", "tinyint", ""), - new FieldSchema("aint", "int", ""), - new FieldSchema("adouble", "double", ""), - new FieldSchema("astring", "string", ""), - new FieldSchema("anullint", "int", ""))); - } - - private HCatSchema buildReorderedSchema() throws HCatException{ - return new HCatSchema(HCatUtil.getHCatFieldSchemaList(new FieldSchema("aint", "int", ""), - new FieldSchema("part1", "string", ""), - new FieldSchema("adouble", "double", ""), - new FieldSchema("newCol", "tinyint", ""), - new FieldSchema("astring", "string", ""), - new FieldSchema("atinyint", "tinyint", ""), - new FieldSchema("anullint", "int", ""))); - } - - private HCatRecord[] getReorderedCols(){ - List rec_1 = new ArrayList(7); - Collections.addAll(rec_1, new Integer(789), - new String("first-part"), - new Double(5.3D), - null, // new column - new String("hcatalog and hadoop"), - new Byte("123"), - null); - HCatRecord tup_1 = new DefaultHCatRecord(rec_1); - - List rec_2 = new ArrayList(7); - Collections.addAll(rec_2, new Integer(123), - new String("first-part"), - new Double(5.3D), - null, - new String("hcatalog and hadoop"), - new Byte("100"), - null); - HCatRecord tup_2 = new DefaultHCatRecord(rec_2); - - return new HCatRecord[]{tup_1,tup_2}; - - } - private Properties getProps(){ - Properties props = new Properties(); - props.setProperty(Constants.SERIALIZATION_NULL_FORMAT, "\\N"); - props.setProperty(Constants.SERIALIZATION_FORMAT, "9"); - return props; - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java hcatalog/core/src/test/java/org/apache/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java deleted file mode 100644 index bfa4eca..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java +++ /dev/null @@ -1,249 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.rcfile; - -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.util.List; -import java.util.Properties; - -import junit.framework.TestCase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.io.RCFile; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; -import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; -import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.compress.CompressionCodec; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.TaskAttemptID; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * TestRCFile. - * - */ -public class TestRCFileMapReduceInputFormat extends TestCase { - - private static final Logger LOG = LoggerFactory.getLogger(TestRCFileMapReduceInputFormat.class); - - private static Configuration conf = new Configuration(); - - private static ColumnarSerDe serDe; - - private static Path file; - - private static FileSystem fs; - - private static Properties tbl; - - static { - try { - fs = FileSystem.getLocal(conf); - Path dir = new Path(System.getProperty("test.data.dir", ".") + "/mapred"); - file = new Path(dir, "test_rcfile"); - fs.delete(dir, true); - // the SerDe part is from TestLazySimpleSerDe - serDe = new ColumnarSerDe(); - // Create the SerDe - tbl = createProperties(); - serDe.initialize(conf, tbl); - } catch (Exception e) { - } - } - - private static BytesRefArrayWritable patialS = new BytesRefArrayWritable(); - - private static byte[][] bytesArray = null; - - private static BytesRefArrayWritable s = null; - - static { - try { - bytesArray = new byte[][]{"123".getBytes("UTF-8"), - "456".getBytes("UTF-8"), "789".getBytes("UTF-8"), - "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), - "hive and hadoop".getBytes("UTF-8"), new byte[0], - "NULL".getBytes("UTF-8")}; - s = new BytesRefArrayWritable(bytesArray.length); - s.set(0, new BytesRefWritable("123".getBytes("UTF-8"))); - s.set(1, new BytesRefWritable("456".getBytes("UTF-8"))); - s.set(2, new BytesRefWritable("789".getBytes("UTF-8"))); - s.set(3, new BytesRefWritable("1000".getBytes("UTF-8"))); - s.set(4, new BytesRefWritable("5.3".getBytes("UTF-8"))); - s.set(5, new BytesRefWritable("hive and hadoop".getBytes("UTF-8"))); - s.set(6, new BytesRefWritable("NULL".getBytes("UTF-8"))); - s.set(7, new BytesRefWritable("NULL".getBytes("UTF-8"))); - - // partial test init - patialS.set(0, new BytesRefWritable("NULL".getBytes("UTF-8"))); - patialS.set(1, new BytesRefWritable("NULL".getBytes("UTF-8"))); - patialS.set(2, new BytesRefWritable("789".getBytes("UTF-8"))); - patialS.set(3, new BytesRefWritable("1000".getBytes("UTF-8"))); - patialS.set(4, new BytesRefWritable("NULL".getBytes("UTF-8"))); - patialS.set(5, new BytesRefWritable("NULL".getBytes("UTF-8"))); - patialS.set(6, new BytesRefWritable("NULL".getBytes("UTF-8"))); - patialS.set(7, new BytesRefWritable("NULL".getBytes("UTF-8"))); - - } catch (UnsupportedEncodingException e) { - } - } - - - /** For debugging and testing. */ - public static void main(String[] args) throws Exception { - int count = 10000; - boolean create = true; - - String usage = "Usage: RCFile " + "[-count N]" + " file"; - if (args.length == 0) { - LOG.error(usage); - System.exit(-1); - } - - try { - for (int i = 0; i < args.length; ++i) { // parse command line - if (args[i] == null) { - continue; - } else if (args[i].equals("-count")) { - count = Integer.parseInt(args[++i]); - } else { - // file is required parameter - file = new Path(args[i]); - } - } - - if (file == null) { - LOG.error(usage); - System.exit(-1); - } - - LOG.info("count = {}", count); - LOG.info("create = {}", create); - LOG.info("file = {}", file); - - // test.performanceTest(); - LOG.info("Finished."); - } finally { - fs.close(); - } - } - - private static Properties createProperties() { - Properties tbl = new Properties(); - - // Set the configuration parameters - tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9"); - tbl.setProperty("columns", - "abyte,ashort,aint,along,adouble,astring,anullint,anullstring"); - tbl.setProperty("columns.types", - "tinyint:smallint:int:bigint:double:string:int:string"); - tbl.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL"); - return tbl; - } - - - public void testSynAndSplit() throws IOException, InterruptedException { - splitBeforeSync(); - splitRightBeforeSync(); - splitInMiddleOfSync(); - splitRightAfterSync(); - splitAfterSync(); - } - - private void splitBeforeSync() throws IOException, InterruptedException { - writeThenReadByRecordReader(600, 1000, 2, 17684, null); - } - - private void splitRightBeforeSync() throws IOException, InterruptedException { - writeThenReadByRecordReader(500, 1000, 2, 17750, null); - } - - private void splitInMiddleOfSync() throws IOException, InterruptedException { - writeThenReadByRecordReader(500, 1000, 2, 17760, null); - - } - - private void splitRightAfterSync() throws IOException, InterruptedException { - writeThenReadByRecordReader(500, 1000, 2, 17770, null); - } - - private void splitAfterSync() throws IOException, InterruptedException { - writeThenReadByRecordReader(500, 1000, 2, 19950, null); - } - - private void writeThenReadByRecordReader(int intervalRecordCount, - int writeCount, int splitNumber, long maxSplitSize, CompressionCodec codec) - throws IOException, InterruptedException { - Path testDir = new Path(System.getProperty("test.data.dir", ".") - + "/mapred/testsmallfirstsplit"); - Path testFile = new Path(testDir, "test_rcfile"); - fs.delete(testFile, true); - Configuration cloneConf = new Configuration(conf); - RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length); - cloneConf.setInt(RCFile.RECORD_INTERVAL_CONF_STR, intervalRecordCount); - - RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec); - - BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length); - for (int i = 0; i < bytesArray.length; i++) { - BytesRefWritable cu = null; - cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length); - bytes.set(i, cu); - } - for (int i = 0; i < writeCount; i++) { - writer.append(bytes); - } - writer.close(); - - RCFileMapReduceInputFormat inputFormat = new RCFileMapReduceInputFormat(); - Configuration jonconf = new Configuration(cloneConf); - jonconf.set("mapred.input.dir", testDir.toString()); - JobContext context = new Job(jonconf); - context.getConfiguration().setLong("mapred.max.split.size", maxSplitSize); - List splits = inputFormat.getSplits(context); - assertEquals("splits length should be " + splitNumber, splits.size(), splitNumber); - int readCount = 0; - for (int i = 0; i < splits.size(); i++) { - TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(jonconf, - new TaskAttemptID()); - RecordReader rr = inputFormat.createRecordReader(splits.get(i), tac); - rr.initialize(splits.get(i), tac); - while (rr.nextKeyValue()) { - readCount++; - } - } - assertEquals("readCount should be equal to writeCount", readCount, writeCount); - } - -} - - diff --git hcatalog/core/src/test/java/org/apache/hcatalog/rcfile/TestRCFileOutputStorageDriver.java.broken hcatalog/core/src/test/java/org/apache/hcatalog/rcfile/TestRCFileOutputStorageDriver.java.broken deleted file mode 100644 index 90458cb..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/rcfile/TestRCFileOutputStorageDriver.java.broken +++ /dev/null @@ -1,105 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.rcfile; - -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import junit.framework.TestCase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; -import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.JobID; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatInputStorageDriver; -import org.apache.hcatalog.mapreduce.HCatOutputStorageDriver; -import org.apache.hcatalog.mapreduce.OutputJobInfo; -import org.apache.hcatalog.shims.HCatHadoopShims; - -public class TestRCFileOutputStorageDriver extends TestCase { - - public void testConversion() throws IOException { - Configuration conf = new Configuration(); - JobContext jc = HCatHadoopShims.Instance.get().createJobContext(conf, new JobID()); - String jobString = HCatUtil.serialize(OutputJobInfo.create(null,null,null)); - jc.getConfiguration().set(HCatConstants.HCAT_KEY_OUTPUT_INFO,jobString); - - HCatSchema schema = buildHiveSchema(); - HCatInputStorageDriver isd = new RCFileInputDriver(); - - isd.setOriginalSchema(jc, schema); - isd.setOutputSchema(jc, schema); - isd.initialize(jc, new Properties()); - - byte[][] byteArray = buildBytesArray(); - - BytesRefArrayWritable bytesWritable = new BytesRefArrayWritable(byteArray.length); - for (int i = 0; i < byteArray.length; i++) { - BytesRefWritable cu = new BytesRefWritable(byteArray[i], 0, byteArray[i].length); - bytesWritable.set(i, cu); - } - - //Convert byte array to HCatRecord using isd, convert hcatrecord back to byte array - //using osd, compare the two arrays - HCatRecord record = isd.convertToHCatRecord(null, bytesWritable); - - HCatOutputStorageDriver osd = new RCFileOutputDriver(); - - osd.setSchema(jc, schema); - osd.initialize(jc, new Properties()); - - BytesRefArrayWritable bytesWritableOutput = (BytesRefArrayWritable) osd.convertValue(record); - - assertTrue(bytesWritableOutput.compareTo(bytesWritable) == 0); - } - - private byte[][] buildBytesArray() throws UnsupportedEncodingException { - byte[][] bytes = {"123".getBytes("UTF-8"), "456".getBytes("UTF-8"), - "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"), - "5.3".getBytes("UTF-8"), "hcat and hadoop".getBytes("UTF-8"), - new byte[0], "\\N".getBytes("UTF-8") }; - return bytes; - } - - private HCatSchema buildHiveSchema() throws HCatException{ - - List fields = new ArrayList(8); - fields.add(new FieldSchema("atinyint", "tinyint", "")); - fields.add(new FieldSchema("asmallint", "smallint", "")); - fields.add(new FieldSchema("aint", "int", "")); - fields.add(new FieldSchema("along", "bigint", "")); - fields.add(new FieldSchema("adouble", "double", "")); - fields.add(new FieldSchema("astring", "string", "")); - fields.add(new FieldSchema("anullint", "int", "")); - fields.add(new FieldSchema("anullstring", "string", "")); - - return new HCatSchema(HCatUtil.getHCatFieldSchemaList(fields)); - } -} diff --git hcatalog/core/src/test/java/org/apache/hcatalog/security/TestHdfsAuthorizationProvider.java hcatalog/core/src/test/java/org/apache/hcatalog/security/TestHdfsAuthorizationProvider.java deleted file mode 100644 index 6213bfa..0000000 --- hcatalog/core/src/test/java/org/apache/hcatalog/security/TestHdfsAuthorizationProvider.java +++ /dev/null @@ -1,583 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.security; - -import static org.apache.hcatalog.HcatTestUtils.perm300; -import static org.apache.hcatalog.HcatTestUtils.perm500; -import static org.apache.hcatalog.HcatTestUtils.perm555; -import static org.apache.hcatalog.HcatTestUtils.perm700; -import static org.apache.hcatalog.HcatTestUtils.perm755; - -import java.io.IOException; -import java.util.Random; - -import junit.framework.Assert; - -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hcatalog.HcatTestUtils; -import org.apache.hcatalog.cli.HCatDriver; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -public class TestHdfsAuthorizationProvider { - - protected HCatDriver hcatDriver; - protected HiveMetaStoreClient msc; - protected HiveConf conf; - protected String whDir; - protected Path whPath; - protected FileSystem whFs; - protected Warehouse wh; - protected Hive hive; - - @Before - public void setUp() throws Exception { - - conf = new HiveConf(this.getClass()); - conf.set(ConfVars.PREEXECHOOKS.varname, ""); - conf.set(ConfVars.POSTEXECHOOKS.varname, ""); - conf.set(ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - - conf.set("hive.metastore.local", "true"); - conf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); - conf.setBoolVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED, true); - conf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, - StorageDelegationAuthorizationProvider.class.getCanonicalName()); - conf.set("fs.pfile.impl", "org.apache.hadoop.fs.ProxyLocalFileSystem"); - - whDir = System.getProperty("test.warehouse.dir", "/tmp/testhdfsauthorization_wh"); - conf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, whDir); - - UserGroupInformation ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); - String username = ShimLoader.getHadoopShims().getShortUserName(ugi); - - whPath = new Path(whDir); - whFs = whPath.getFileSystem(conf); - - wh = new Warehouse(conf); - hive = Hive.get(conf); - - //clean up mess in HMS - HcatTestUtils.cleanupHMS(hive, wh, perm700); - - whFs.delete(whPath, true); - whFs.mkdirs(whPath, perm755); - - SessionState.start(new CliSessionState(conf)); - hcatDriver = new HCatDriver(); - } - - @After - public void tearDown() throws IOException { - whFs.close(); - hcatDriver.close(); - Hive.closeCurrent(); - } - - public Path getDbPath(String dbName) throws MetaException, HiveException { - return HcatTestUtils.getDbPath(hive, wh, dbName); - } - - public Path getTablePath(String dbName, String tableName) throws HiveException { - Table table = hive.getTable(dbName, tableName); - return table.getPath(); - } - - public Path getPartPath(String partName, String dbName, String tableName) throws HiveException { - return new Path(getTablePath(dbName, tableName), partName); - } - - /** Execute the query expecting success*/ - public void exec(String format, Object... args) throws Exception { - String command = String.format(format, args); - CommandProcessorResponse resp = hcatDriver.run(command); - Assert.assertEquals(resp.getErrorMessage(), 0, resp.getResponseCode()); - Assert.assertEquals(resp.getErrorMessage(), null, resp.getErrorMessage()); - } - - /** Execute the query expecting it to fail with AuthorizationException */ - public void execFail(String format, Object... args) throws Exception { - String command = String.format(format, args); - CommandProcessorResponse resp = hcatDriver.run(command); - Assert.assertNotSame(resp.getErrorMessage(), 0, resp.getResponseCode()); - Assert.assertTrue((resp.getResponseCode() == 40000) || (resp.getResponseCode() == 403)); - if (resp.getErrorMessage() != null) { - Assert.assertTrue(resp.getErrorMessage().contains("org.apache.hadoop.security.AccessControlException")); - } - } - - - /** - * Tests whether the warehouse directory is writable by the current user (as defined by Hadoop) - */ - @Test - public void testWarehouseIsWritable() throws Exception { - Path top = new Path(whPath, "_foobarbaz12_"); - try { - whFs.mkdirs(top); - } finally { - whFs.delete(top, true); - } - } - - @Test - public void testShowDatabases() throws Exception { - exec("CREATE DATABASE doo"); - exec("SHOW DATABASES"); - - whFs.setPermission(whPath, perm300); //revoke r - execFail("SHOW DATABASES"); - } - - @Test - public void testDatabaseOps() throws Exception { - exec("SHOW TABLES"); - exec("SHOW TABLE EXTENDED LIKE foo1"); - - whFs.setPermission(whPath, perm700); - exec("CREATE DATABASE doo"); - exec("DESCRIBE DATABASE doo"); - exec("USE doo"); - exec("SHOW TABLES"); - exec("SHOW TABLE EXTENDED LIKE foo1"); - exec("DROP DATABASE doo"); - - //custom location - Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); - whFs.mkdirs(dbPath, perm700); - exec("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); - exec("DESCRIBE DATABASE doo2", dbPath.toUri()); - exec("USE doo2"); - exec("SHOW TABLES"); - exec("SHOW TABLE EXTENDED LIKE foo1"); - exec("DROP DATABASE doo2", dbPath.toUri()); - - //custom non-existing location - exec("CREATE DATABASE doo3 LOCATION '%s/subpath'", dbPath.toUri()); - } - - @Test - public void testCreateDatabaseFail1() throws Exception { - whFs.setPermission(whPath, perm500); - execFail("CREATE DATABASE doo"); //in the default location - - whFs.setPermission(whPath, perm555); - execFail("CREATE DATABASE doo2"); - } - - @Test - public void testCreateDatabaseFail2() throws Exception { - //custom location - Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); - - whFs.mkdirs(dbPath, perm700); - whFs.setPermission(dbPath, perm500); - execFail("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); - } - - @Test - public void testDropDatabaseFail1() throws Exception { - whFs.setPermission(whPath, perm700); - exec("CREATE DATABASE doo"); //in the default location - - whFs.setPermission(getDbPath("doo"), perm500); //revoke write - execFail("DROP DATABASE doo"); - } - - @Test - public void testDropDatabaseFail2() throws Exception { - //custom location - Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); - - whFs.mkdirs(dbPath, perm700); - exec("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); - - whFs.setPermission(dbPath, perm500); - execFail("DROP DATABASE doo2"); - } - - @Test - public void testDescSwitchDatabaseFail() throws Exception { - whFs.setPermission(whPath, perm700); - exec("CREATE DATABASE doo"); - whFs.setPermission(getDbPath("doo"), perm300); //revoke read - execFail("DESCRIBE DATABASE doo"); - execFail("USE doo"); - - //custom location - Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); - whFs.mkdirs(dbPath, perm700); - exec("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); - whFs.mkdirs(dbPath, perm300); //revoke read - execFail("DESCRIBE DATABASE doo2", dbPath.toUri()); - execFail("USE doo2"); - } - - @Test - public void testShowTablesFail() throws Exception { - whFs.setPermission(whPath, perm700); - exec("CREATE DATABASE doo"); - exec("USE doo"); - whFs.setPermission(getDbPath("doo"), perm300); //revoke read - execFail("SHOW TABLES"); - execFail("SHOW TABLE EXTENDED LIKE foo1"); - } - - @Test - public void testTableOps() throws Exception { - //default db - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - exec("DESCRIBE foo1"); - exec("DROP TABLE foo1"); - - //default db custom location - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm700); - exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - exec("DESCRIBE foo2"); - exec("DROP TABLE foo2"); - - //default db custom non existing location - exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s/subpath'", tablePath); - exec("DESCRIBE foo3"); - exec("DROP TABLE foo3"); - - //non default db - exec("CREATE DATABASE doo"); - exec("USE doo"); - - exec("CREATE TABLE foo4 (foo INT) STORED AS RCFILE"); - exec("DESCRIBE foo4"); - exec("DROP TABLE foo4"); - - //non-default db custom location - tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm700); - exec("CREATE EXTERNAL TABLE foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - exec("DESCRIBE foo5"); - exec("DROP TABLE foo5"); - - //non-default db custom non existing location - exec("CREATE EXTERNAL TABLE foo6 (foo INT) STORED AS RCFILE LOCATION '%s/subpath'", tablePath); - exec("DESCRIBE foo6"); - exec("DROP TABLE foo6"); - - exec("DROP TABLE IF EXISTS foo_non_exists"); - - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - exec("DESCRIBE EXTENDED foo1"); - exec("DESCRIBE FORMATTED foo1"); - exec("DESCRIBE foo1.foo"); - - //deep non-existing path for the table - tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm700); - exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s/a/a/a/'", tablePath); - } - - @Test - public void testCreateTableFail1() throws Exception { - //default db - whFs.mkdirs(whPath, perm500); //revoke w - execFail("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - } - - @Test - public void testCreateTableFail2() throws Exception { - //default db custom location - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm500); - execFail("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - - //default db custom non existing location - execFail("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s/subpath'", tablePath); - } - - @Test - public void testCreateTableFail3() throws Exception { - //non default db - exec("CREATE DATABASE doo"); - whFs.setPermission(getDbPath("doo"), perm500); - - execFail("CREATE TABLE doo.foo4 (foo INT) STORED AS RCFILE"); - - //non-default db custom location, permission to write to tablePath, but not on db path - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm700); - exec("USE doo"); - execFail("CREATE EXTERNAL TABLE foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - } - - @Test - public void testCreateTableFail4() throws Exception { - //non default db - exec("CREATE DATABASE doo"); - - //non-default db custom location - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm500); - execFail("CREATE EXTERNAL TABLE doo.foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - - //non-default db custom non existing location - execFail("CREATE EXTERNAL TABLE doo.foo6 (foo INT) STORED AS RCFILE LOCATION '%s/a/a/a/'", tablePath); - } - - @Test - public void testDropTableFail1() throws Exception { - //default db - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - whFs.mkdirs(getTablePath("default", "foo1"), perm500); //revoke w - execFail("DROP TABLE foo1"); - } - - @Test - public void testDropTableFail2() throws Exception { - //default db custom location - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - whFs.mkdirs(tablePath, perm500); - execFail("DROP TABLE foo2"); - } - - @Test - public void testDropTableFail4() throws Exception { - //non default db - exec("CREATE DATABASE doo"); - - //non-default db custom location - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - - exec("CREATE EXTERNAL TABLE doo.foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - whFs.mkdirs(tablePath, perm500); - exec("USE doo"); //There is no DROP TABLE doo.foo5 support in Hive - execFail("DROP TABLE foo5"); - } - - @Test - public void testDescTableFail() throws Exception { - //default db - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - whFs.mkdirs(getTablePath("default", "foo1"), perm300); //revoke read - execFail("DESCRIBE foo1"); - - //default db custom location - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm700); - exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - whFs.mkdirs(tablePath, perm300); //revoke read - execFail("DESCRIBE foo2"); - } - - @Test - public void testAlterTableRename() throws Exception { - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - exec("ALTER TABLE foo1 RENAME TO foo2"); - - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - exec("ALTER TABLE foo3 RENAME TO foo4"); - } - - @Test - public void testAlterTableRenameFail() throws Exception { - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - whFs.mkdirs(getTablePath("default", "foo1"), perm500); //revoke write - execFail("ALTER TABLE foo1 RENAME TO foo2"); - - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); - whFs.mkdirs(tablePath, perm500); //revoke write - execFail("ALTER TABLE foo3 RENAME TO foo4"); - } - - @Test - public void testAlterTableRelocate() throws Exception { - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - exec("ALTER TABLE foo1 SET LOCATION '%s'", tablePath.makeQualified(whFs)); - - tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); - exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", - tablePath.makeQualified(whFs)); - tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); - exec("ALTER TABLE foo3 SET LOCATION '%s'", tablePath.makeQualified(whFs)); - } - - @Test - public void testAlterTableRelocateFail() throws Exception { - exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); - Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); - whFs.mkdirs(tablePath, perm500); //revoke write - execFail("ALTER TABLE foo1 SET LOCATION '%s'", tablePath.makeQualified(whFs)); - - //dont have access to new table loc - tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); - exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", - tablePath.makeQualified(whFs)); - tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); - whFs.mkdirs(tablePath, perm500); //revoke write - execFail("ALTER TABLE foo3 SET LOCATION '%s'", tablePath.makeQualified(whFs)); - - //have access to new table loc, but not old table loc - tablePath = new Path(whPath, new Random().nextInt() + "/mytable3"); - exec("CREATE EXTERNAL TABLE foo4 (foo INT) STORED AS RCFILE LOCATION '%s'", - tablePath.makeQualified(whFs)); - whFs.mkdirs(tablePath, perm500); //revoke write - tablePath = new Path(whPath, new Random().nextInt() + "/mytable3"); - execFail("ALTER TABLE foo4 SET LOCATION '%s'", tablePath.makeQualified(whFs)); - } - - @Test - public void testAlterTable() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); - exec("ALTER TABLE foo1 SET TBLPROPERTIES ('foo'='bar')"); - exec("ALTER TABLE foo1 SET SERDEPROPERTIES ('foo'='bar')"); - exec("ALTER TABLE foo1 ADD COLUMNS (foo2 INT)"); - } - - @Test - public void testAddDropPartition() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10')"); - exec("ALTER TABLE foo1 ADD IF NOT EXISTS PARTITION (b='2010-10-10')"); - String relPath = new Random().nextInt() + "/mypart"; - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-11') LOCATION '%s'", relPath); - - exec("ALTER TABLE foo1 PARTITION (b='2010-10-10') SET FILEFORMAT RCFILE"); - - exec("ALTER TABLE foo1 PARTITION (b='2010-10-10') SET FILEFORMAT INPUTFORMAT " - + "'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " - + "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver " - + "'mydriver' outputdriver 'yourdriver'"); - - exec("ALTER TABLE foo1 DROP PARTITION (b='2010-10-10')"); - exec("ALTER TABLE foo1 DROP PARTITION (b='2010-10-11')"); - } - - @Test - public void testAddPartitionFail1() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); - whFs.mkdirs(getTablePath("default", "foo1"), perm500); - execFail("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10')"); - } - - @Test - public void testAddPartitionFail2() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); - String relPath = new Random().nextInt() + "/mypart"; - Path partPath = new Path(getTablePath("default", "foo1"), relPath); - whFs.mkdirs(partPath, perm500); - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10') LOCATION '%s'", partPath); - } - - @Test - public void testDropPartitionFail1() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10')"); - whFs.mkdirs(getPartPath("b=2010-10-10", "default", "foo1"), perm500); - execFail("ALTER TABLE foo1 DROP PARTITION (b='2010-10-10')"); - } - - @Test - public void testDropPartitionFail2() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); - String relPath = new Random().nextInt() + "/mypart"; - Path partPath = new Path(getTablePath("default", "foo1"), relPath); - whFs.mkdirs(partPath, perm700); - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10') LOCATION '%s'", partPath); - whFs.mkdirs(partPath, perm500); //revoke write - execFail("ALTER TABLE foo1 DROP PARTITION (b='2010-10-10')"); - } - - @Test - public void testAlterTableFail() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (boo STRING) STORED AS TEXTFILE"); - whFs.mkdirs(getTablePath("default", "foo1"), perm500); //revoke write - execFail("ALTER TABLE foo1 SET TBLPROPERTIES ('foo'='bar')"); - execFail("ALTER TABLE foo1 SET SERDEPROPERTIES ('foo'='bar')"); - execFail("ALTER TABLE foo1 ADD COLUMNS (foo2 INT)"); - } - - @Test - public void testShowTables() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (boo STRING) STORED AS TEXTFILE"); - exec("SHOW PARTITIONS foo1"); - - whFs.mkdirs(getTablePath("default", "foo1"), perm300); //revoke read - execFail("SHOW PARTITIONS foo1"); - } - - @Test - public void testAlterTablePartRename() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); - Path loc = new Path(whPath, new Random().nextInt() + "/mypart"); - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16') LOCATION '%s'", loc); - exec("ALTER TABLE foo1 PARTITION (b='2010-10-16') RENAME TO PARTITION (b='2010-10-17')"); - } - - @Test - public void testAlterTablePartRenameFail() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); - Path loc = new Path(whPath, new Random().nextInt() + "/mypart"); - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16') LOCATION '%s'", loc); - whFs.setPermission(loc, perm500); //revoke w - execFail("ALTER TABLE foo1 PARTITION (b='2010-10-16') RENAME TO PARTITION (b='2010-10-17')"); - } - - @Test - public void testAlterTablePartRelocate() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16')"); - Path partPath = new Path(whPath, new Random().nextInt() + "/mypart"); - exec("ALTER TABLE foo1 PARTITION (b='2010-10-16') SET LOCATION '%s'", partPath.makeQualified(whFs)); - } - - @Test - public void testAlterTablePartRelocateFail() throws Exception { - exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); - - Path oldLoc = new Path(whPath, new Random().nextInt() + "/mypart"); - Path newLoc = new Path(whPath, new Random().nextInt() + "/mypart2"); - - exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16') LOCATION '%s'", oldLoc); - whFs.mkdirs(oldLoc, perm500); - execFail("ALTER TABLE foo1 PARTITION (b='2010-10-16') SET LOCATION '%s'", newLoc.makeQualified(whFs)); - whFs.mkdirs(oldLoc, perm700); - whFs.mkdirs(newLoc, perm500); - execFail("ALTER TABLE foo1 PARTITION (b='2010-10-16') SET LOCATION '%s'", newLoc.makeQualified(whFs)); - } - -} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/ExitException.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/ExitException.java new file mode 100644 index 0000000..4e3226f --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/ExitException.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog; + +public class ExitException extends SecurityException { + private static final long serialVersionUID = -1982617086752946683L; + private final int status; + + /** + * @return the status + */ + public int getStatus() { + return status; + } + + public ExitException(int status) { + + super("Raising exception, instead of System.exit(). Return code was: " + status); + this.status = status; + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/HcatTestUtils.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/HcatTestUtils.java new file mode 100644 index 0000000..1168936 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/HcatTestUtils.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utility methods for tests + */ +public class HcatTestUtils { + private static final Logger LOG = LoggerFactory.getLogger(HcatTestUtils.class); + + public static FsPermission perm007 = FsPermission.createImmutable((short) 0007); // -------rwx + public static FsPermission perm070 = FsPermission.createImmutable((short) 0070); // ----rwx--- + public static FsPermission perm700 = FsPermission.createImmutable((short) 0700); // -rwx------ + public static FsPermission perm755 = FsPermission.createImmutable((short) 0755); // -rwxr-xr-x + public static FsPermission perm777 = FsPermission.createImmutable((short) 0777); // -rwxrwxrwx + public static FsPermission perm300 = FsPermission.createImmutable((short) 0300); // --wx------ + public static FsPermission perm500 = FsPermission.createImmutable((short) 0500); // -r-x------ + public static FsPermission perm555 = FsPermission.createImmutable((short) 0555); // -r-xr-xr-x + + /** + * Returns the database path. + */ + public static Path getDbPath(Hive hive, Warehouse wh, String dbName) throws MetaException, HiveException { + return wh.getDatabasePath(hive.getDatabase(dbName)); + } + + /** + * Removes all databases and tables from the metastore + */ + public static void cleanupHMS(Hive hive, Warehouse wh, FsPermission defaultPerm) + throws HiveException, MetaException, NoSuchObjectException { + for (String dbName : hive.getAllDatabases()) { + if (dbName.equals("default")) { + continue; + } + try { + Path path = getDbPath(hive, wh, dbName); + FileSystem whFs = path.getFileSystem(hive.getConf()); + whFs.setPermission(path, defaultPerm); + } catch (IOException ex) { + //ignore + } + hive.dropDatabase(dbName, true, true, true); + } + + //clean tables in default db + for (String tablename : hive.getAllTables("default")) { + hive.dropTable("default", tablename, true, true); + } + } + + public static void createTestDataFile(String filename, String[] lines) throws IOException { + FileWriter writer = null; + try { + File file = new File(filename); + file.deleteOnExit(); + writer = new FileWriter(file); + for (String line : lines) { + writer.write(line + "\n"); + } + } finally { + if (writer != null) { + writer.close(); + } + } + + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/MiniCluster.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/MiniCluster.java new file mode 100644 index 0000000..3d38c45 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/MiniCluster.java @@ -0,0 +1,201 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.util.Iterator; +import java.util.Map; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.MiniMRCluster; + +/** + * This class builds a single instance of itself with the Singleton + * design pattern. While building the single instance, it sets up a + * mini cluster that actually consists of a mini DFS cluster and a + * mini MapReduce cluster on the local machine and also sets up the + * environment for Pig to run on top of the mini cluster. + */ +public class MiniCluster { + private MiniDFSCluster m_dfs = null; + private MiniMRCluster m_mr = null; + private FileSystem m_fileSys = null; + private JobConf m_conf = null; + + private final static MiniCluster INSTANCE = new MiniCluster(); + private static boolean isSetup = true; + + private MiniCluster() { + setupMiniDfsAndMrClusters(); + } + + private void setupMiniDfsAndMrClusters() { + try { + final int dataNodes = 1; // There will be 4 data nodes + final int taskTrackers = 1; // There will be 4 task tracker nodes + Configuration config = new Configuration(); + + // Builds and starts the mini dfs and mapreduce clusters + System.setProperty("hadoop.log.dir", "."); + m_dfs = new MiniDFSCluster(config, dataNodes, true, null); + + m_fileSys = m_dfs.getFileSystem(); + m_mr = new MiniMRCluster(taskTrackers, m_fileSys.getUri().toString(), 1); + + // Create the configuration hadoop-site.xml file + File conf_dir = new File(System.getProperty("user.home"), "pigtest/conf/"); + conf_dir.mkdirs(); + File conf_file = new File(conf_dir, "hadoop-site.xml"); + + // Write the necessary config info to hadoop-site.xml + m_conf = m_mr.createJobConf(); + m_conf.setInt("mapred.submit.replication", 1); + m_conf.set("dfs.datanode.address", "0.0.0.0:0"); + m_conf.set("dfs.datanode.http.address", "0.0.0.0:0"); + m_conf.writeXml(new FileOutputStream(conf_file)); + + // Set the system properties needed by Pig + System.setProperty("cluster", m_conf.get("mapred.job.tracker")); + System.setProperty("namenode", m_conf.get("fs.default.name")); + System.setProperty("junit.hadoop.conf", conf_dir.getPath()); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** + * Returns the single instance of class MiniClusterBuilder that + * represents the resouces for a mini dfs cluster and a mini + * mapreduce cluster. + */ + public static MiniCluster buildCluster() { + if (!isSetup) { + INSTANCE.setupMiniDfsAndMrClusters(); + isSetup = true; + } + return INSTANCE; + } + + public void shutDown() { + INSTANCE.shutdownMiniDfsAndMrClusters(); + } + + @Override + protected void finalize() { + shutdownMiniDfsAndMrClusters(); + } + + private void shutdownMiniDfsAndMrClusters() { + isSetup = false; + try { + if (m_fileSys != null) { + m_fileSys.close(); + } + } catch (IOException e) { + e.printStackTrace(); + } + if (m_dfs != null) { + m_dfs.shutdown(); + } + if (m_mr != null) { + m_mr.shutdown(); + } + m_fileSys = null; + m_dfs = null; + m_mr = null; + } + + public Properties getProperties() { + errorIfNotSetup(); + Properties properties = new Properties(); + assert m_conf != null; + Iterator> iter = m_conf.iterator(); + while (iter.hasNext()) { + Map.Entry entry = iter.next(); + properties.put(entry.getKey(), entry.getValue()); + } + return properties; + } + + public void setProperty(String name, String value) { + errorIfNotSetup(); + m_conf.set(name, value); + } + + public FileSystem getFileSystem() { + errorIfNotSetup(); + return m_fileSys; + } + + /** + * Throw RunTimeException if isSetup is false + */ + private void errorIfNotSetup() { + if (isSetup) { + return; + } + String msg = "function called on MiniCluster that has been shutdown"; + throw new RuntimeException(msg); + } + + static public void createInputFile(MiniCluster miniCluster, String fileName, + String[] inputData) + throws IOException { + FileSystem fs = miniCluster.getFileSystem(); + createInputFile(fs, fileName, inputData); + } + + static public void createInputFile(FileSystem fs, String fileName, + String[] inputData) throws IOException { + Path path = new Path(fileName); + if (fs.exists(path)) { + throw new IOException("File " + fileName + " already exists on the minicluster"); + } + FSDataOutputStream stream = fs.create(path); + PrintWriter pw = new PrintWriter(new OutputStreamWriter(stream, "UTF-8")); + for (int i = 0; i < inputData.length; i++) { + pw.println(inputData[i]); + } + pw.close(); + + } + + /** + * Helper to remove a dfs file from the minicluster DFS + * + * @param miniCluster reference to the Minicluster where the file should be deleted + * @param fileName pathname of the file to be deleted + * @throws IOException + */ + static public void deleteFile(MiniCluster miniCluster, String fileName) + throws IOException { + FileSystem fs = miniCluster.getFileSystem(); + fs.delete(new Path(fileName), true); + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/NoExitSecurityManager.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/NoExitSecurityManager.java new file mode 100644 index 0000000..8fc674b --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/NoExitSecurityManager.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog; + +import java.security.Permission; + +public class NoExitSecurityManager extends SecurityManager { + + @Override + public void checkPermission(Permission perm) { + // allow anything. + } + + @Override + public void checkPermission(Permission perm, Object context) { + // allow anything. + } + + @Override + public void checkExit(int status) { + + super.checkExit(status); + throw new ExitException(status); + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/DummyStorageHandler.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/DummyStorageHandler.java new file mode 100644 index 0000000..48f90a5 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/DummyStorageHandler.java @@ -0,0 +1,289 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.cli; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.HiveMetaHook; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.ql.io.HiveOutputFormat; +import org.apache.hadoop.hive.ql.metadata.AuthorizationException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider; +import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; +import org.apache.hadoop.hive.ql.security.authorization.Privilege; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.OutputFormat; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.RecordWriter; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.util.Progressable; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.mapreduce.HCatStorageHandler; + +class DummyStorageHandler extends HCatStorageHandler { + + @Override + public Configuration getConf() { + return null; + } + + @Override + public void setConf(Configuration conf) { + } + + @Override + public Class getInputFormatClass() { + return DummyInputFormat.class; + } + + @Override + public Class getOutputFormatClass() { + return DummyOutputFormat.class; + } + + @Override + public Class getSerDeClass() { + return ColumnarSerDe.class; + } + + @Override + public HiveMetaHook getMetaHook() { + return null; + } + + @Override + public void configureInputJobProperties(TableDesc tableDesc, Map jobProperties) { + } + + @Override + public void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties) { + } + + @Override + public HiveAuthorizationProvider getAuthorizationProvider() + throws HiveException { + return new DummyAuthProvider(); + } + + private class DummyAuthProvider implements HiveAuthorizationProvider { + + @Override + public Configuration getConf() { + return null; + } + + /* @param conf + * @see org.apache.hadoop.conf.Configurable#setConf(org.apache.hadoop.conf.Configuration) + */ + @Override + public void setConf(Configuration conf) { + } + + /* @param conf + /* @throws HiveException + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#init(org.apache.hadoop.conf.Configuration) + */ + @Override + public void init(Configuration conf) throws HiveException { + } + + /* @return HiveAuthenticationProvider + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#getAuthenticator() + */ + @Override + public HiveAuthenticationProvider getAuthenticator() { + return null; + } + + /* @param authenticator + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#setAuthenticator(org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider) + */ + @Override + public void setAuthenticator(HiveAuthenticationProvider authenticator) { + } + + /* @param readRequiredPriv + /* @param writeRequiredPriv + /* @throws HiveException + /* @throws AuthorizationException + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } + + /* @param db + /* @param readRequiredPriv + /* @param writeRequiredPriv + /* @throws HiveException + /* @throws AuthorizationException + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.metastore.api.Database, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Database db, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } + + /* @param table + /* @param readRequiredPriv + /* @param writeRequiredPriv + /* @throws HiveException + /* @throws AuthorizationException + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.metadata.Table, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(org.apache.hadoop.hive.ql.metadata.Table table, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } + + /* @param part + /* @param readRequiredPriv + /* @param writeRequiredPriv + /* @throws HiveException + /* @throws AuthorizationException + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.metadata.Partition, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Partition part, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } + + /* @param table + /* @param part + /* @param columns + /* @param readRequiredPriv + /* @param writeRequiredPriv + /* @throws HiveException + /* @throws AuthorizationException + * @see org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider#authorize(org.apache.hadoop.hive.ql.metadata.Table, org.apache.hadoop.hive.ql.metadata.Partition, java.util.List, org.apache.hadoop.hive.ql.security.authorization.Privilege[], org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(org.apache.hadoop.hive.ql.metadata.Table table, Partition part, List columns, + Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + } + + } + + /** + * The Class DummyInputFormat is a dummy implementation of the old hadoop + * mapred.InputFormat required by HiveStorageHandler. + */ + class DummyInputFormat implements + InputFormat { + + /* + * @see + * org.apache.hadoop.mapred.InputFormat#getRecordReader(org.apache.hadoop + * .mapred.InputSplit, org.apache.hadoop.mapred.JobConf, + * org.apache.hadoop.mapred.Reporter) + */ + @Override + public RecordReader getRecordReader( + InputSplit split, JobConf jobconf, Reporter reporter) + throws IOException { + throw new IOException("This operation is not supported."); + } + + /* + * @see + * org.apache.hadoop.mapred.InputFormat#getSplits(org.apache.hadoop. + * mapred .JobConf, int) + */ + @Override + public InputSplit[] getSplits(JobConf jobconf, int number) + throws IOException { + throw new IOException("This operation is not supported."); + } + } + + /** + * The Class DummyOutputFormat is a dummy implementation of the old hadoop + * mapred.OutputFormat and HiveOutputFormat required by HiveStorageHandler. + */ + class DummyOutputFormat implements + OutputFormat, HCatRecord>, + HiveOutputFormat, HCatRecord> { + + /* + * @see + * org.apache.hadoop.mapred.OutputFormat#checkOutputSpecs(org.apache + * .hadoop .fs.FileSystem, org.apache.hadoop.mapred.JobConf) + */ + @Override + public void checkOutputSpecs(FileSystem fs, JobConf jobconf) + throws IOException { + throw new IOException("This operation is not supported."); + + } + + /* + * @see + * org.apache.hadoop.mapred.OutputFormat#getRecordWriter(org.apache. + * hadoop .fs.FileSystem, org.apache.hadoop.mapred.JobConf, + * java.lang.String, org.apache.hadoop.util.Progressable) + */ + @Override + public RecordWriter, HCatRecord> getRecordWriter( + FileSystem fs, JobConf jobconf, String str, + Progressable progress) throws IOException { + throw new IOException("This operation is not supported."); + } + + /* + * @see + * org.apache.hadoop.hive.ql.io.HiveOutputFormat#getHiveRecordWriter(org + * .apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path, + * java.lang.Class, boolean, java.util.Properties, + * org.apache.hadoop.util.Progressable) + */ + @Override + public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter( + JobConf jc, Path finalOutPath, + Class valueClass, boolean isCompressed, + Properties tableProperties, Progressable progress) + throws IOException { + throw new IOException("This operation is not supported."); + } + + } + +} + + diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestEximSemanticAnalysis.java.broken hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestEximSemanticAnalysis.java.broken new file mode 100644 index 0000000..506a40e --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestEximSemanticAnalysis.java.broken @@ -0,0 +1,175 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hcatalog.cli; + +import java.io.IOException; +import java.net.URI; + +import junit.framework.TestCase; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hcatalog.MiniCluster; +import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; +import org.apache.hcatalog.common.HCatConstants; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +public class TestEximSemanticAnalysis extends TestCase { + + private final MiniCluster cluster = MiniCluster.buildCluster(); + private HiveConf hcatConf; + private HCatDriver hcatDriver; + private Warehouse wh; + private static final Logger LOG = LoggerFactory.getLogger(TestEximSemanticAnalysis.class); + + @Override + protected void setUp() throws Exception { + + hcatConf = new HiveConf(this.getClass()); + hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); + hcatConf.set("fs.pfile.impl", "org.apache.hadoop.fs.ProxyLocalFileSystem"); + URI fsuri = cluster.getFileSystem().getUri(); + Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), "/user/hive/warehouse"); + hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); + hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); + wh = new Warehouse(hcatConf); + SessionState.start(new CliSessionState(hcatConf)); + + hcatDriver = new HCatDriver(); + } + + @Override + protected void tearDown() throws Exception { + } + + public void testExportPerms() throws IOException, MetaException, HiveException { + + hcatDriver.run("drop table junit_sem_analysis"); + CommandProcessorResponse response = hcatDriver + .run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); + assertEquals(0, response.getResponseCode()); + Path whPath = wh.getTablePath(Hive.get(hcatConf).getDatabase("default"), "junit_sem_analysis"); + cluster.getFileSystem().setPermission(whPath, FsPermission.valueOf("-rwxrwx-wx")); + cluster.getFileSystem().setOwner(whPath, "nosuchuser", "nosuchgroup"); + + Runtime.getRuntime().exec("rm -rf /tmp/hcat"); + response = hcatDriver + .run("export table junit_sem_analysis to 'pfile://local:9080/tmp/hcat/exports/junit_sem_analysis'"); + + assertEquals(10, response.getResponseCode()); + assertTrue("Permission denied expected : "+response.getErrorMessage(), + response.getErrorMessage().startsWith( + "FAILED: Error in semantic analysis: org.apache.hcatalog.common.HCatException : 3000 : Permission denied")); + Runtime.getRuntime().exec("rm -rf /tmp/hcat"); + response = hcatDriver.run("drop table junit_sem_analysis"); + if (response.getResponseCode() != 0) { + LOG.error(response.getErrorMessage()); + fail("Drop table failed"); + } + } + + public void testImportPerms() throws IOException, MetaException, HiveException { + + hcatDriver.run("drop table junit_sem_analysis"); + CommandProcessorResponse response = hcatDriver + .run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); + assertEquals(0, response.getResponseCode()); + Runtime.getRuntime().exec("rm -rf /tmp/hcat"); + response = hcatDriver + .run("export table junit_sem_analysis to 'pfile://local:9080/tmp/hcat/exports/junit_sem_analysis'"); + assertEquals(0, response.getResponseCode()); + response = hcatDriver.run("drop table junit_sem_analysis"); + assertEquals(0, response.getResponseCode()); + response = hcatDriver + .run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); + assertEquals(0, response.getResponseCode()); + Path whPath = wh.getTablePath(Hive.get(hcatConf).getDatabase("default"), "junit_sem_analysis"); + cluster.getFileSystem().setPermission(whPath, FsPermission.valueOf("-rwxrwxr-x")); + cluster.getFileSystem().setOwner(whPath, "nosuchuser", "nosuchgroup"); + + response = hcatDriver + .run("import table junit_sem_analysis from 'pfile://local:9080/tmp/hcat/exports/junit_sem_analysis'"); + + assertEquals(10, response.getResponseCode()); + assertTrue( + "Permission denied expected: "+response.getErrorMessage() , + response.getErrorMessage().startsWith( + "FAILED: Error in semantic analysis: org.apache.hcatalog.common.HCatException : 3000 : Permission denied")); + Runtime.getRuntime().exec("rm -rf /tmp/hcat"); + + cluster.getFileSystem().setPermission(whPath, FsPermission.valueOf("-rwxrwxrwx")); + response = hcatDriver.run("drop table junit_sem_analysis"); + if (response.getResponseCode() != 0) { + LOG.error(response.getErrorMessage()); + fail("Drop table failed"); + } + } + + public void testImportSetPermsGroup() throws IOException, MetaException, HiveException { + + hcatDriver.run("drop table junit_sem_analysis"); + hcatDriver.run("drop table junit_sem_analysis_imported"); + CommandProcessorResponse response = hcatDriver + .run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); + assertEquals(0, response.getResponseCode()); + Runtime.getRuntime().exec("rm -rf /tmp/hcat"); + response = hcatDriver + .run("export table junit_sem_analysis to 'pfile://local:9080/tmp/hcat/exports/junit_sem_analysis'"); + assertEquals(0, response.getResponseCode()); + response = hcatDriver.run("drop table junit_sem_analysis"); + assertEquals(0, response.getResponseCode()); + + hcatConf.set(HCatConstants.HCAT_PERMS, "-rwxrw-r--"); + hcatConf.set(HCatConstants.HCAT_GROUP, "nosuchgroup"); + + response = hcatDriver + .run("import table junit_sem_analysis_imported from 'pfile://local:9080/tmp/hcat/exports/junit_sem_analysis'"); + assertEquals(0, response.getResponseCode()); + + Path whPath = wh.getTablePath(Hive.get(hcatConf).getDatabase("default"), "junit_sem_analysis_imported"); + assertEquals(FsPermission.valueOf("-rwxrw-r--"), cluster.getFileSystem().getFileStatus(whPath).getPermission()); + assertEquals("nosuchgroup", cluster.getFileSystem().getFileStatus(whPath).getGroup()); + + Runtime.getRuntime().exec("rm -rf /tmp/hcat"); + + response = hcatDriver.run("drop table junit_sem_analysis_imported"); + if (response.getResponseCode() != 0) { + LOG.error(response.getErrorMessage()); + fail("Drop table failed"); + } + } + + +} + diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestPermsGrp.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestPermsGrp.java new file mode 100644 index 0000000..a1a8b93 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestPermsGrp.java @@ -0,0 +1,232 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.cli; + +import java.io.FileNotFoundException; +import java.util.ArrayList; + +import junit.framework.TestCase; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.InvalidOperationException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.Type; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hive.hcatalog.ExitException; +import org.apache.hive.hcatalog.NoExitSecurityManager; + +import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestPermsGrp extends TestCase { + + private boolean isServerRunning = false; + private static final int msPort = 20101; + private HiveConf hcatConf; + private Warehouse clientWH; + private HiveMetaStoreClient msc; + private static final Logger LOG = LoggerFactory.getLogger(TestPermsGrp.class); + + @Override + protected void tearDown() throws Exception { + System.setSecurityManager(securityManager); + } + + @Override + protected void setUp() throws Exception { + + if (isServerRunning) { + return; + } + + MetaStoreUtils.startMetaStore(msPort, ShimLoader.getHadoopThriftAuthBridge()); + + isServerRunning = true; + + securityManager = System.getSecurityManager(); + System.setSecurityManager(new NoExitSecurityManager()); + + hcatConf = new HiveConf(this.getClass()); + hcatConf.set("hive.metastore.local", "false"); + hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://127.0.0.1:" + msPort); + hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); + hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); + + hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); + hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hcatConf.set(HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT.varname, "60"); + clientWH = new Warehouse(hcatConf); + msc = new HiveMetaStoreClient(hcatConf, null); + System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); + System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); + } + + + public void testCustomPerms() throws Exception { + + String dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; + String tblName = "simptbl"; + String typeName = "Person"; + + try { + + // Lets first test for default permissions, this is the case when user specified nothing. + Table tbl = getTable(dbName, tblName, typeName); + msc.createTable(tbl); + Database db = Hive.get(hcatConf).getDatabase(dbName); + Path dfsPath = clientWH.getTablePath(db, tblName); + cleanupTbl(dbName, tblName, typeName); + + // Next user did specify perms. + try { + HCatCli.main(new String[]{"-e", "create table simptbl (name string) stored as RCFILE", "-p", "rwx-wx---"}); + } catch (Exception e) { + assertTrue(e instanceof ExitException); + assertEquals(((ExitException) e).getStatus(), 0); + } + dfsPath = clientWH.getTablePath(db, tblName); + assertTrue(dfsPath.getFileSystem(hcatConf).getFileStatus(dfsPath).getPermission().equals(FsPermission.valueOf("drwx-wx---"))); + + cleanupTbl(dbName, tblName, typeName); + + // User specified perms in invalid format. + hcatConf.set(HCatConstants.HCAT_PERMS, "rwx"); + // make sure create table fails. + try { + HCatCli.main(new String[]{"-e", "create table simptbl (name string) stored as RCFILE", "-p", "rwx"}); + assert false; + } catch (Exception me) { + assertTrue(me instanceof ExitException); + } + // No physical dir gets created. + dfsPath = clientWH.getTablePath(db, tblName); + try { + dfsPath.getFileSystem(hcatConf).getFileStatus(dfsPath); + assert false; + } catch (Exception fnfe) { + assertTrue(fnfe instanceof FileNotFoundException); + } + + // And no metadata gets created. + try { + msc.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName); + assert false; + } catch (Exception e) { + assertTrue(e instanceof NoSuchObjectException); + assertEquals("default.simptbl table not found", e.getMessage()); + } + + // test for invalid group name + hcatConf.set(HCatConstants.HCAT_PERMS, "drw-rw-rw-"); + hcatConf.set(HCatConstants.HCAT_GROUP, "THIS_CANNOT_BE_A_VALID_GRP_NAME_EVER"); + + try { + // create table must fail. + HCatCli.main(new String[]{"-e", "create table simptbl (name string) stored as RCFILE", "-p", "rw-rw-rw-", "-g", "THIS_CANNOT_BE_A_VALID_GRP_NAME_EVER"}); + assert false; + } catch (Exception me) { + assertTrue(me instanceof SecurityException); + } + + try { + // no metadata should get created. + msc.getTable(dbName, tblName); + assert false; + } catch (Exception e) { + assertTrue(e instanceof NoSuchObjectException); + assertEquals("default.simptbl table not found", e.getMessage()); + } + try { + // neither dir should get created. + dfsPath.getFileSystem(hcatConf).getFileStatus(dfsPath); + assert false; + } catch (Exception e) { + assertTrue(e instanceof FileNotFoundException); + } + + } catch (Exception e) { + LOG.error("testCustomPerms failed.", e); + throw e; + } + } + + private void silentDropDatabase(String dbName) throws MetaException, TException { + try { + for (String tableName : msc.getTables(dbName, "*")) { + msc.dropTable(dbName, tableName); + } + + } catch (NoSuchObjectException e) { + } + } + + private void cleanupTbl(String dbName, String tblName, String typeName) throws NoSuchObjectException, MetaException, TException, InvalidOperationException { + + msc.dropTable(dbName, tblName); + msc.dropType(typeName); + } + + private Table getTable(String dbName, String tblName, String typeName) throws NoSuchObjectException, MetaException, TException, AlreadyExistsException, InvalidObjectException { + + msc.dropTable(dbName, tblName); + silentDropDatabase(dbName); + + + msc.dropType(typeName); + Type typ1 = new Type(); + typ1.setName(typeName); + typ1.setFields(new ArrayList(1)); + typ1.getFields().add(new FieldSchema("name", serdeConstants.STRING_TYPE_NAME, "")); + msc.createType(typ1); + + Table tbl = new Table(); + tbl.setDbName(dbName); + tbl.setTableName(tblName); + StorageDescriptor sd = new StorageDescriptor(); + tbl.setSd(sd); + sd.setCols(typ1.getFields()); + + sd.setSerdeInfo(new SerDeInfo()); + return tbl; + } + + + private SecurityManager securityManager; + +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestSemanticAnalysis.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestSemanticAnalysis.java new file mode 100644 index 0000000..4bfb7fc --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestSemanticAnalysis.java @@ -0,0 +1,421 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.cli; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.CommandNeedRetryException; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; +import org.apache.hadoop.hive.ql.io.RCFileInputFormat; +import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.mapred.TextInputFormat; +import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; +import org.apache.hive.hcatalog.mapreduce.HCatBaseTest; +import org.apache.thrift.TException; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestSemanticAnalysis extends HCatBaseTest { + + private static final Logger LOG = LoggerFactory.getLogger(TestSemanticAnalysis.class); + private static final String TBL_NAME = "junit_sem_analysis"; + + private Driver hcatDriver = null; + private String query; + + @Before + public void setUpHCatDriver() throws IOException { + if (hcatDriver == null) { + HiveConf hcatConf = new HiveConf(hiveConf); + hcatConf.set(HiveConf.ConfVars.HIVEDEFAULTRCFILESERDE.varname, + "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"); + hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + hcatDriver = new Driver(hcatConf); + SessionState.start(new CliSessionState(hcatConf)); + } + } + + @Test + public void testDescDB() throws CommandNeedRetryException, IOException { + hcatDriver.run("drop database mydb cascade"); + assertEquals(0, hcatDriver.run("create database mydb").getResponseCode()); + CommandProcessorResponse resp = hcatDriver.run("describe database mydb"); + assertEquals(0, resp.getResponseCode()); + ArrayList result = new ArrayList(); + hcatDriver.getResults(result); + assertTrue(result.get(0).contains("mydb.db")); + hcatDriver.run("drop database mydb cascade"); + } + + @Test + public void testCreateTblWithLowerCasePartNames() throws CommandNeedRetryException, MetaException, TException, NoSuchObjectException { + driver.run("drop table junit_sem_analysis"); + CommandProcessorResponse resp = driver.run("create table junit_sem_analysis (a int) partitioned by (B string) stored as TEXTFILE"); + assertEquals(resp.getResponseCode(), 0); + assertEquals(null, resp.getErrorMessage()); + Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + assertEquals("Partition key name case problem", "b", tbl.getPartitionKeys().get(0).getName()); + driver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAlterTblFFpart() throws MetaException, TException, NoSuchObjectException, CommandNeedRetryException { + + driver.run("drop table junit_sem_analysis"); + driver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as TEXTFILE"); + driver.run("alter table junit_sem_analysis add partition (b='2010-10-10')"); + hcatDriver.run("alter table junit_sem_analysis partition (b='2010-10-10') set fileformat RCFILE"); + + Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + assertEquals(TextInputFormat.class.getName(), tbl.getSd().getInputFormat()); + assertEquals(HiveIgnoreKeyTextOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); + + List partVals = new ArrayList(1); + partVals.add("2010-10-10"); + Partition part = client.getPartition(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME, partVals); + + assertEquals(RCFileInputFormat.class.getName(), part.getSd().getInputFormat()); + assertEquals(RCFileOutputFormat.class.getName(), part.getSd().getOutputFormat()); + + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testUsNonExistentDB() throws CommandNeedRetryException { + CommandProcessorResponse resp = hcatDriver.run("use no_such_db"); + assertEquals(1, resp.getResponseCode()); + } + + @Test + public void testDatabaseOperations() throws MetaException, CommandNeedRetryException { + + List dbs = client.getAllDatabases(); + String testDb1 = "testdatabaseoperatons1"; + String testDb2 = "testdatabaseoperatons2"; + + if (dbs.contains(testDb1.toLowerCase())) { + assertEquals(0, hcatDriver.run("drop database " + testDb1).getResponseCode()); + } + + if (dbs.contains(testDb2.toLowerCase())) { + assertEquals(0, hcatDriver.run("drop database " + testDb2).getResponseCode()); + } + + assertEquals(0, hcatDriver.run("create database " + testDb1).getResponseCode()); + assertTrue(client.getAllDatabases().contains(testDb1)); + assertEquals(0, hcatDriver.run("create database if not exists " + testDb1).getResponseCode()); + assertTrue(client.getAllDatabases().contains(testDb1)); + assertEquals(0, hcatDriver.run("create database if not exists " + testDb2).getResponseCode()); + assertTrue(client.getAllDatabases().contains(testDb2)); + + assertEquals(0, hcatDriver.run("drop database " + testDb1).getResponseCode()); + assertEquals(0, hcatDriver.run("drop database " + testDb2).getResponseCode()); + assertFalse(client.getAllDatabases().contains(testDb1)); + assertFalse(client.getAllDatabases().contains(testDb2)); + } + + @Test + public void testCreateTableIfNotExists() throws MetaException, TException, NoSuchObjectException, CommandNeedRetryException { + + hcatDriver.run("drop table " + TBL_NAME); + hcatDriver.run("create table junit_sem_analysis (a int) stored as RCFILE"); + Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + List cols = tbl.getSd().getCols(); + assertEquals(1, cols.size()); + assertTrue(cols.get(0).equals(new FieldSchema("a", "int", null))); + assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); + assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); + + CommandProcessorResponse resp = hcatDriver.run("create table if not exists junit_sem_analysis (a int) stored as RCFILE"); + assertEquals(0, resp.getResponseCode()); + assertNull(resp.getErrorMessage()); + tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + cols = tbl.getSd().getCols(); + assertEquals(1, cols.size()); + assertTrue(cols.get(0).equals(new FieldSchema("a", "int", null))); + assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); + assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); + + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAlterTblTouch() throws CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); + CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis touch"); + assertEquals(0, response.getResponseCode()); + + hcatDriver.run("alter table junit_sem_analysis touch partition (b='12')"); + assertEquals(0, response.getResponseCode()); + + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testChangeColumns() throws CommandNeedRetryException { + hcatDriver.run("drop table junit_sem_analysis"); + hcatDriver.run("create table junit_sem_analysis (a int, c string) partitioned by (b string) stored as RCFILE"); + CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis change a a1 int"); + assertEquals(0, response.getResponseCode()); + + response = hcatDriver.run("alter table junit_sem_analysis change a1 a string"); + assertEquals(0, response.getResponseCode()); + + response = hcatDriver.run("alter table junit_sem_analysis change a a int after c"); + assertEquals(0, response.getResponseCode()); + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAddReplaceCols() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + hcatDriver.run("create table junit_sem_analysis (a int, c string) partitioned by (b string) stored as RCFILE"); + CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis replace columns (a1 tinyint)"); + assertEquals(0, response.getResponseCode()); + + response = hcatDriver.run("alter table junit_sem_analysis add columns (d tinyint)"); + assertEquals(0, response.getResponseCode()); + assertNull(response.getErrorMessage()); + + response = hcatDriver.run("describe extended junit_sem_analysis"); + assertEquals(0, response.getResponseCode()); + Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + List cols = tbl.getSd().getCols(); + assertEquals(2, cols.size()); + assertTrue(cols.get(0).equals(new FieldSchema("a1", "tinyint", null))); + assertTrue(cols.get(1).equals(new FieldSchema("d", "tinyint", null))); + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAlterTblClusteredBy() throws CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); + CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis clustered by (a) into 7 buckets"); + assertEquals(0, response.getResponseCode()); + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAlterTableSetFF() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); + + Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); + assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); + + hcatDriver.run("alter table junit_sem_analysis set fileformat INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " + + "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver 'mydriver' outputdriver 'yourdriver'"); + hcatDriver.run("desc extended junit_sem_analysis"); + + tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); + assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); + + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAddPartFail() throws CommandNeedRetryException { + + driver.run("drop table junit_sem_analysis"); + driver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); + CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis add partition (b='2') location 'README.txt'"); + assertEquals(0, response.getResponseCode()); + driver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAddPartPass() throws IOException, CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); + CommandProcessorResponse response = hcatDriver.run("alter table junit_sem_analysis add partition (b='2') location '" + TEST_DATA_DIR + "'"); + assertEquals(0, response.getResponseCode()); + assertNull(response.getErrorMessage()); + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testCTAS() throws CommandNeedRetryException { + hcatDriver.run("drop table junit_sem_analysis"); + query = "create table junit_sem_analysis (a int) as select * from tbl2"; + CommandProcessorResponse response = hcatDriver.run(query); + assertEquals(40000, response.getResponseCode()); + assertTrue(response.getErrorMessage().contains("FAILED: SemanticException Operation not supported. Create table as Select is not a valid operation.")); + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testStoredAs() throws CommandNeedRetryException { + hcatDriver.run("drop table junit_sem_analysis"); + query = "create table junit_sem_analysis (a int)"; + CommandProcessorResponse response = hcatDriver.run(query); + assertEquals(0, response.getResponseCode()); + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testAddDriverInfo() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as " + + "INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " + + "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver 'mydriver' outputdriver 'yourdriver' "; + assertEquals(0, hcatDriver.run(query).getResponseCode()); + + Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME); + assertEquals(RCFileInputFormat.class.getName(), tbl.getSd().getInputFormat()); + assertEquals(RCFileOutputFormat.class.getName(), tbl.getSd().getOutputFormat()); + + hcatDriver.run("drop table junit_sem_analysis"); + } + + @Test + public void testInvalidateNonStringPartition() throws IOException, CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + query = "create table junit_sem_analysis (a int) partitioned by (b int) stored as RCFILE"; + + CommandProcessorResponse response = hcatDriver.run(query); + assertEquals(40000, response.getResponseCode()); + assertEquals("FAILED: SemanticException Operation not supported. HCatalog only supports partition columns of type string. For column: b Found type: int", + response.getErrorMessage()); + + } + + @Test + public void testInvalidateSeqFileStoredAs() throws IOException, CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as SEQUENCEFILE"; + + CommandProcessorResponse response = hcatDriver.run(query); + assertEquals(0, response.getResponseCode()); + + } + + @Test + public void testInvalidateTextFileStoredAs() throws IOException, CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as TEXTFILE"; + + CommandProcessorResponse response = hcatDriver.run(query); + assertEquals(0, response.getResponseCode()); + + } + + @Test + public void testInvalidateClusteredBy() throws IOException, CommandNeedRetryException { + + hcatDriver.run("drop table junit_sem_analysis"); + query = "create table junit_sem_analysis (a int) partitioned by (b string) clustered by (a) into 10 buckets stored as TEXTFILE"; + + CommandProcessorResponse response = hcatDriver.run(query); + assertEquals(0, response.getResponseCode()); + } + + @Test + public void testCTLFail() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_sem_analysis"); + driver.run("drop table like_table"); + query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"; + + driver.run(query); + query = "create table like_table like junit_sem_analysis"; + CommandProcessorResponse response = hcatDriver.run(query); + assertEquals(0, response.getResponseCode()); + } + + @Test + public void testCTLPass() throws IOException, MetaException, TException, NoSuchObjectException, CommandNeedRetryException { + + try { + hcatDriver.run("drop table junit_sem_analysis"); + } catch (Exception e) { + LOG.error("Error in drop table.", e); + } + query = "create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"; + + hcatDriver.run(query); + String likeTbl = "like_table"; + hcatDriver.run("drop table " + likeTbl); + query = "create table like_table like junit_sem_analysis"; + CommandProcessorResponse resp = hcatDriver.run(query); + assertEquals(0, resp.getResponseCode()); +// Table tbl = client.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, likeTbl); +// assertEquals(likeTbl,tbl.getTableName()); +// List cols = tbl.getSd().getCols(); +// assertEquals(1, cols.size()); +// assertEquals(new FieldSchema("a", "int", null), cols.get(0)); +// assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat",tbl.getSd().getInputFormat()); +// assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat",tbl.getSd().getOutputFormat()); +// Map tblParams = tbl.getParameters(); +// assertEquals("org.apache.hadoop.hive.hcat.rcfile.RCFileInputStorageDriver", tblParams.get("hcat.isd")); +// assertEquals("org.apache.hadoop.hive.hcat.rcfile.RCFileOutputStorageDriver", tblParams.get("hcat.osd")); +// +// hcatDriver.run("drop table junit_sem_analysis"); +// hcatDriver.run("drop table "+likeTbl); + } + +// This test case currently fails, since add partitions don't inherit anything from tables. + +// public void testAddPartInheritDrivers() throws MetaException, TException, NoSuchObjectException{ +// +// hcatDriver.run("drop table "+TBL_NAME); +// hcatDriver.run("create table junit_sem_analysis (a int) partitioned by (b string) stored as RCFILE"); +// hcatDriver.run("alter table "+TBL_NAME+" add partition (b='2010-10-10')"); +// +// List partVals = new ArrayList(1); +// partVals.add("2010-10-10"); +// +// Map map = client.getPartition(MetaStoreUtils.DEFAULT_DATABASE_NAME, TBL_NAME, partVals).getParameters(); +// assertEquals(map.get(InitializeInput.HOWL_ISD_CLASS), RCFileInputStorageDriver.class.getName()); +// assertEquals(map.get(InitializeInput.HOWL_OSD_CLASS), RCFileOutputStorageDriver.class.getName()); +// } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestStorageHandlerProperties.java.broken hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestStorageHandlerProperties.java.broken new file mode 100644 index 0000000..7612337 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestStorageHandlerProperties.java.broken @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hcatalog.cli; + +import static org.junit.Assert.assertEquals; + +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.CommandNeedRetryException; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; +import org.apache.hcatalog.common.HCatConstants; +import org.apache.thrift.TException; + +import junit.framework.TestCase; + +public class TestStorageHandlerProperties extends TestCase { + + private Driver hcatDriver; + private Driver hiveDriver; + private HiveMetaStoreClient msc; + + protected void setUp() throws Exception { + HiveConf hcatConf = new HiveConf(this.getClass()); + hcatConf.set(ConfVars.PREEXECHOOKS.varname, ""); + hcatConf.set(ConfVars.POSTEXECHOOKS.varname, ""); + hcatConf.set(ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + + HiveConf hiveConf = new HiveConf(hcatConf,this.getClass()); + hiveDriver = new Driver(hiveConf); + + hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); + hcatDriver = new Driver(hcatConf); + + msc = new HiveMetaStoreClient(hcatConf); + SessionState.start(new CliSessionState(hcatConf)); + } + + public void testTableProperties() throws CommandNeedRetryException, MetaException ,TException, NoSuchObjectException{ + hcatDriver.run("drop table test_table"); + CommandProcessorResponse response = hcatDriver + .run("create table test_table(key int, value string) STORED BY " + + "'org.apache.hcatalog.cli.DummyStorageHandler' "); + + assertEquals(0, response.getResponseCode()); + Table tbl = msc.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, "test_table"); + DummyStorageHandler dsh = new DummyStorageHandler(); + assertTrue(tbl.getParameters().containsKey(HCatConstants.HCAT_ISD_CLASS)); + assertTrue(tbl.getParameters().containsKey(HCatConstants.HCAT_OSD_CLASS)); + assertEquals(tbl.getParameters().get(HCatConstants.HCAT_ISD_CLASS), dsh.getInputStorageDriver().getName()); + assertEquals(tbl.getParameters().get(HCatConstants.HCAT_OSD_CLASS), dsh.getOutputStorageDriver().getName()); + } + + /* @throws java.lang.Exception + * @see junit.framework.TestCase#tearDown() + */ + protected void tearDown() throws Exception { + super.tearDown(); + } + +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestUseDatabase.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestUseDatabase.java new file mode 100644 index 0000000..42442ae --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/cli/TestUseDatabase.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.cli; + +import java.io.IOException; + +import junit.framework.TestCase; + +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.ql.CommandNeedRetryException; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; + +/* Unit test for GitHub Howl issue #3 */ +public class TestUseDatabase extends TestCase { + + private Driver hcatDriver; + + @Override + protected void setUp() throws Exception { + + HiveConf hcatConf = new HiveConf(this.getClass()); + hcatConf.set(ConfVars.PREEXECHOOKS.varname, ""); + hcatConf.set(ConfVars.POSTEXECHOOKS.varname, ""); + hcatConf.set(ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + + hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); + hcatDriver = new Driver(hcatConf); + SessionState.start(new CliSessionState(hcatConf)); + } + + String query; + private final String dbName = "testUseDatabase_db"; + private final String tblName = "testUseDatabase_tbl"; + + public void testAlterTablePass() throws IOException, CommandNeedRetryException { + + hcatDriver.run("create database " + dbName); + hcatDriver.run("use " + dbName); + hcatDriver.run("create table " + tblName + " (a int) partitioned by (b string) stored as RCFILE"); + + CommandProcessorResponse response; + + response = hcatDriver.run("alter table " + tblName + " add partition (b='2') location '/tmp'"); + assertEquals(0, response.getResponseCode()); + assertNull(response.getErrorMessage()); + + response = hcatDriver.run("alter table " + tblName + " set fileformat INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " + + "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver 'mydriver' outputdriver 'yourdriver'"); + assertEquals(0, response.getResponseCode()); + assertNull(response.getErrorMessage()); + + hcatDriver.run("drop table " + tblName); + hcatDriver.run("drop database " + dbName); + } + +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHCatUtil.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHCatUtil.java new file mode 100644 index 0000000..8082d4a --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHCatUtil.java @@ -0,0 +1,183 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.common; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.junit.Assert; +import org.junit.Test; + +public class TestHCatUtil { + + @Test + public void testFsPermissionOperation() { + + HashMap permsCode = new HashMap(); + + for (int i = 0; i < 8; i++) { + for (int j = 0; j < 8; j++) { + for (int k = 0; k < 8; k++) { + StringBuilder sb = new StringBuilder(); + sb.append("0"); + sb.append(i); + sb.append(j); + sb.append(k); + Integer code = (((i * 8) + j) * 8) + k; + String perms = (new FsPermission(Short.decode(sb.toString()))).toString(); + if (permsCode.containsKey(perms)) { + Assert.assertEquals("permissions(" + perms + ") mapped to multiple codes", code, permsCode.get(perms)); + } + permsCode.put(perms, code); + assertFsPermissionTransformationIsGood(perms); + } + } + } + } + + private void assertFsPermissionTransformationIsGood(String perms) { + Assert.assertEquals(perms, FsPermission.valueOf("-" + perms).toString()); + } + + @Test + public void testValidateMorePermissive() { + assertConsistentFsPermissionBehaviour(FsAction.ALL, true, true, true, true, true, true, true, true); + assertConsistentFsPermissionBehaviour(FsAction.READ, false, true, false, true, false, false, false, false); + assertConsistentFsPermissionBehaviour(FsAction.WRITE, false, true, false, false, true, false, false, false); + assertConsistentFsPermissionBehaviour(FsAction.EXECUTE, false, true, true, false, false, false, false, false); + assertConsistentFsPermissionBehaviour(FsAction.READ_EXECUTE, false, true, true, true, false, true, false, false); + assertConsistentFsPermissionBehaviour(FsAction.READ_WRITE, false, true, false, true, true, false, true, false); + assertConsistentFsPermissionBehaviour(FsAction.WRITE_EXECUTE, false, true, true, false, true, false, false, true); + assertConsistentFsPermissionBehaviour(FsAction.NONE, false, true, false, false, false, false, false, false); + } + + + private void assertConsistentFsPermissionBehaviour( + FsAction base, boolean versusAll, boolean versusNone, + boolean versusX, boolean versusR, boolean versusW, + boolean versusRX, boolean versusRW, boolean versusWX) { + + Assert.assertTrue(versusAll == HCatUtil.validateMorePermissive(base, FsAction.ALL)); + Assert.assertTrue(versusX == HCatUtil.validateMorePermissive(base, FsAction.EXECUTE)); + Assert.assertTrue(versusNone == HCatUtil.validateMorePermissive(base, FsAction.NONE)); + Assert.assertTrue(versusR == HCatUtil.validateMorePermissive(base, FsAction.READ)); + Assert.assertTrue(versusRX == HCatUtil.validateMorePermissive(base, FsAction.READ_EXECUTE)); + Assert.assertTrue(versusRW == HCatUtil.validateMorePermissive(base, FsAction.READ_WRITE)); + Assert.assertTrue(versusW == HCatUtil.validateMorePermissive(base, FsAction.WRITE)); + Assert.assertTrue(versusWX == HCatUtil.validateMorePermissive(base, FsAction.WRITE_EXECUTE)); + } + + @Test + public void testExecutePermissionsCheck() { + Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.ALL)); + Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.NONE)); + Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.EXECUTE)); + Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.READ_EXECUTE)); + Assert.assertTrue(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.WRITE_EXECUTE)); + + Assert.assertFalse(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.READ)); + Assert.assertFalse(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.WRITE)); + Assert.assertFalse(HCatUtil.validateExecuteBitPresentIfReadOrWrite(FsAction.READ_WRITE)); + + } + + @Test + public void testGetTableSchemaWithPtnColsApi() throws IOException { + // Check the schema of a table with one field & no partition keys. + StorageDescriptor sd = new StorageDescriptor( + Lists.newArrayList(new FieldSchema("username", serdeConstants.STRING_TYPE_NAME, null)), + "location", "org.apache.hadoop.mapred.TextInputFormat", + "org.apache.hadoop.mapred.TextOutputFormat", false, -1, new SerDeInfo(), + new ArrayList(), new ArrayList(), new HashMap()); + org.apache.hadoop.hive.metastore.api.Table apiTable = + new org.apache.hadoop.hive.metastore.api.Table("test_tblname", "test_dbname", "test_owner", + 0, 0, 0, sd, new ArrayList(), new HashMap(), + "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name()); + Table table = new Table(apiTable); + + List expectedHCatSchema = + Lists.newArrayList(new HCatFieldSchema("username", HCatFieldSchema.Type.STRING, null)); + + Assert.assertEquals(new HCatSchema(expectedHCatSchema), + HCatUtil.getTableSchemaWithPtnCols(table)); + + // Add a partition key & ensure its reflected in the schema. + List partitionKeys = + Lists.newArrayList(new FieldSchema("dt", serdeConstants.STRING_TYPE_NAME, null)); + table.getTTable().setPartitionKeys(partitionKeys); + expectedHCatSchema.add(new HCatFieldSchema("dt", HCatFieldSchema.Type.STRING, null)); + Assert.assertEquals(new HCatSchema(expectedHCatSchema), + HCatUtil.getTableSchemaWithPtnCols(table)); + } + + /** + * Hive represents tables in two ways: + *
    + *
  • org.apache.hadoop.hive.metastore.api.Table - exactly whats stored in the metastore
  • + *
  • org.apache.hadoop.hive.ql.metadata.Table - adds business logic over api.Table
  • + *
+ * Here we check SerDe-reported fields are included in the table schema. + */ + @Test + public void testGetTableSchemaWithPtnColsSerDeReportedFields() throws IOException { + Map parameters = Maps.newHashMap(); + parameters.put(serdeConstants.SERIALIZATION_CLASS, + "org.apache.hadoop.hive.serde2.thrift.test.IntString"); + parameters.put(serdeConstants.SERIALIZATION_FORMAT, "org.apache.thrift.protocol.TBinaryProtocol"); + + SerDeInfo serDeInfo = new SerDeInfo(null, + "org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer", parameters); + + // StorageDescriptor has an empty list of fields - SerDe will report them. + StorageDescriptor sd = new StorageDescriptor(new ArrayList(), "location", + "org.apache.hadoop.mapred.TextInputFormat", "org.apache.hadoop.mapred.TextOutputFormat", + false, -1, serDeInfo, new ArrayList(), new ArrayList(), + new HashMap()); + + org.apache.hadoop.hive.metastore.api.Table apiTable = + new org.apache.hadoop.hive.metastore.api.Table("test_tblname", "test_dbname", "test_owner", + 0, 0, 0, sd, new ArrayList(), new HashMap(), + "viewOriginalText", "viewExpandedText", TableType.EXTERNAL_TABLE.name()); + Table table = new Table(apiTable); + + List expectedHCatSchema = Lists.newArrayList( + new HCatFieldSchema("myint", HCatFieldSchema.Type.INT, null), + new HCatFieldSchema("mystring", HCatFieldSchema.Type.STRING, null), + new HCatFieldSchema("underscore_int", HCatFieldSchema.Type.INT, null)); + + Assert.assertEquals(new HCatSchema(expectedHCatSchema), + HCatUtil.getTableSchemaWithPtnCols(table)); + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHiveClientCache.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHiveClientCache.java new file mode 100644 index 0000000..e701f61 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/common/TestHiveClientCache.java @@ -0,0 +1,267 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.common; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStore; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hive.hcatalog.NoExitSecurityManager; +import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; +import org.apache.thrift.TException; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import org.junit.Ignore; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.security.auth.login.LoginException; +import java.io.IOException; +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +public class TestHiveClientCache { + + private static final Logger LOG = LoggerFactory.getLogger(TestHiveClientCache.class); + final HiveConf hiveConf = new HiveConf(); + + @BeforeClass + public static void setUp() throws Exception { + } + + @AfterClass + public static void tearDown() throws Exception { + } + + @Test + public void testCacheHit() throws IOException, MetaException, LoginException { + + HiveClientCache cache = new HiveClientCache(1000); + HiveMetaStoreClient client = cache.get(hiveConf); + assertNotNull(client); + client.close(); // close shouldn't matter + + // Setting a non important configuration should return the same client only + hiveConf.setIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS, 10); + HiveMetaStoreClient client2 = cache.get(hiveConf); + assertNotNull(client2); + assertEquals(client, client2); + client2.close(); + } + + @Test + public void testCacheMiss() throws IOException, MetaException, LoginException { + HiveClientCache cache = new HiveClientCache(1000); + HiveMetaStoreClient client = cache.get(hiveConf); + assertNotNull(client); + + // Set different uri as it is one of the criteria deciding whether to return the same client or not + hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, " "); // URIs are checked for string equivalence, even spaces make them different + HiveMetaStoreClient client2 = cache.get(hiveConf); + assertNotNull(client2); + assertNotSame(client, client2); + } + + /** + * Check that a new client is returned for the same configuration after the expiry time. + * Also verify that the expiry time configuration is honoured + */ + @Test + public void testCacheExpiry() throws IOException, MetaException, LoginException, InterruptedException { + HiveClientCache cache = new HiveClientCache(1); + HiveClientCache.CacheableHiveMetaStoreClient client = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(hiveConf); + assertNotNull(client); + + Thread.sleep(2500); + HiveMetaStoreClient client2 = cache.get(hiveConf); + client.close(); + assertTrue(client.isClosed()); // close() after *expiry time* and *a cache access* should have tore down the client + + assertNotNull(client2); + assertNotSame(client, client2); + } + + /** + * Check that a *new* client is created if asked from different threads even with + * the same hive configuration + * @throws ExecutionException + * @throws InterruptedException + */ + @Test + public void testMultipleThreadAccess() throws ExecutionException, InterruptedException { + final HiveClientCache cache = new HiveClientCache(1000); + + class GetHiveClient implements Callable { + @Override + public HiveMetaStoreClient call() throws IOException, MetaException, LoginException { + return cache.get(hiveConf); + } + } + + ExecutorService executor = Executors.newFixedThreadPool(2); + + Callable worker1 = new GetHiveClient(); + Callable worker2 = new GetHiveClient(); + Future clientFuture1 = executor.submit(worker1); + Future clientFuture2 = executor.submit(worker2); + HiveMetaStoreClient client1 = clientFuture1.get(); + HiveMetaStoreClient client2 = clientFuture2.get(); + assertNotNull(client1); + assertNotNull(client2); + assertNotSame(client1, client2); + } + + @Test + public void testCloseAllClients() throws IOException, MetaException, LoginException { + final HiveClientCache cache = new HiveClientCache(1000); + HiveClientCache.CacheableHiveMetaStoreClient client1 = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(hiveConf); + hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, " "); // URIs are checked for string equivalence, even spaces make them different + HiveClientCache.CacheableHiveMetaStoreClient client2 = (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(hiveConf); + cache.closeAllClientsQuietly(); + assertTrue(client1.isClosed()); + assertTrue(client2.isClosed()); + } + + /** + * Test that a long table name actually breaks the HMSC. Subsequently check that isOpen() reflects + * and tells if the client is broken + */ + @Ignore("hangs indefinitely") + @Test + public void testHMSCBreakability() throws IOException, MetaException, LoginException, TException, AlreadyExistsException, + InvalidObjectException, NoSuchObjectException, InterruptedException { + // Setup + LocalMetaServer metaServer = new LocalMetaServer(); + metaServer.start(); + + final HiveClientCache cache = new HiveClientCache(1000); + HiveClientCache.CacheableHiveMetaStoreClient client = + (HiveClientCache.CacheableHiveMetaStoreClient) cache.get(metaServer.getHiveConf()); + + assertTrue(client.isOpen()); + + final String DB_NAME = "test_db"; + final String LONG_TABLE_NAME = "long_table_name_" + new BigInteger(200, new Random()).toString(2); + + try { + client.dropTable(DB_NAME, LONG_TABLE_NAME); + } catch (Exception e) { + } + try { + client.dropDatabase(DB_NAME); + } catch (Exception e) { + } + + client.createDatabase(new Database(DB_NAME, "", null, null)); + + List fields = new ArrayList(); + fields.add(new FieldSchema("colname", serdeConstants.STRING_TYPE_NAME, "")); + Table tbl = new Table(); + tbl.setDbName(DB_NAME); + tbl.setTableName(LONG_TABLE_NAME); + StorageDescriptor sd = new StorageDescriptor(); + sd.setCols(fields); + tbl.setSd(sd); + sd.setSerdeInfo(new SerDeInfo()); + + // Break the client + try { + client.createTable(tbl); + fail("Exception was expected while creating table with long name"); + } catch (Exception e) { + } + + assertFalse(client.isOpen()); + metaServer.shutDown(); + } + + private static class LocalMetaServer implements Runnable { + public final int MS_PORT = 20101; + private final HiveConf hiveConf; + private final SecurityManager securityManager; + public final static int WAIT_TIME_FOR_BOOTUP = 30000; + + public LocalMetaServer() { + securityManager = System.getSecurityManager(); + System.setSecurityManager(new NoExitSecurityManager()); + hiveConf = new HiveConf(TestHiveClientCache.class); + hiveConf.set("hive.metastore.local", "false"); + hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + + MS_PORT); + hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); + hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); + hiveConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, + "false"); + System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); + System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); + } + + public void start() throws InterruptedException { + Thread thread = new Thread(this); + thread.start(); + Thread.sleep(WAIT_TIME_FOR_BOOTUP); // Wait for the server to bootup + } + + @Override + public void run() { + try { + HiveMetaStore.main(new String[]{"-v", "-p", String.valueOf(MS_PORT)}); + } catch (Throwable t) { + LOG.error("Exiting. Got exception from metastore: ", t); + } + } + + public HiveConf getHiveConf() { + return hiveConf; + } + + public void shutDown() { + System.setSecurityManager(securityManager); + } + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/HCatDataCheckUtil.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/HCatDataCheckUtil.java new file mode 100644 index 0000000..fe6a014 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/HCatDataCheckUtil.java @@ -0,0 +1,114 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.data; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map.Entry; + +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.CommandNeedRetryException; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hive.hcatalog.MiniCluster; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Helper class for Other Data Testers + */ +public class HCatDataCheckUtil { + + private static final Logger LOG = LoggerFactory.getLogger(HCatDataCheckUtil.class); + + public static Driver instantiateDriver(MiniCluster cluster) { + HiveConf hiveConf = new HiveConf(HCatDataCheckUtil.class); + for (Entry e : cluster.getProperties().entrySet()) { + hiveConf.set(e.getKey().toString(), e.getValue().toString()); + } + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + + LOG.debug("Hive conf : {}", hiveConf.getAllProperties()); + Driver driver = new Driver(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); + return driver; + } + + public static void generateDataFile(MiniCluster cluster, String fileName) throws IOException { + MiniCluster.deleteFile(cluster, fileName); + String[] input = new String[50]; + for (int i = 0; i < 50; i++) { + input[i] = (i % 5) + "\t" + i + "\t" + "_S" + i + "S_"; + } + MiniCluster.createInputFile(cluster, fileName, input); + } + + public static void createTable(Driver driver, String tableName, String createTableArgs) + throws CommandNeedRetryException, IOException { + String createTable = "create table " + tableName + createTableArgs; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]"); + } + } + + public static void dropTable(Driver driver, String tablename) throws IOException, CommandNeedRetryException { + driver.run("drop table if exists " + tablename); + } + + public static ArrayList formattedRun(Driver driver, String name, String selectCmd) + throws CommandNeedRetryException, IOException { + driver.run(selectCmd); + ArrayList src_values = new ArrayList(); + driver.getResults(src_values); + LOG.info("{} : {}", name, src_values); + return src_values; + } + + + public static boolean recordsEqual(HCatRecord first, HCatRecord second) { + return (compareRecords(first, second) == 0); + } + + public static int compareRecords(HCatRecord first, HCatRecord second) { + return compareRecordContents(first.getAll(), second.getAll()); + } + + public static int compareRecordContents(List first, List second) { + int mySz = first.size(); + int urSz = second.size(); + if (mySz != urSz) { + return mySz - urSz; + } else { + for (int i = 0; i < first.size(); i++) { + int c = DataType.compare(first.get(i), second.get(i)); + if (c != 0) { + return c; + } + } + return 0; + } + } + + +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestDefaultHCatRecord.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestDefaultHCatRecord.java new file mode 100644 index 0000000..efbbd0e --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestDefaultHCatRecord.java @@ -0,0 +1,260 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.data; + +import java.io.DataInput; +import java.io.DataInputStream; +import java.io.DataOutput; +import java.io.DataOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils; + +import junit.framework.Assert; +import junit.framework.TestCase; + +public class TestDefaultHCatRecord extends TestCase { + + public void testRYW() throws IOException { + + File f = new File("binary.dat"); + f.delete(); + f.createNewFile(); + f.deleteOnExit(); + + OutputStream fileOutStream = new FileOutputStream(f); + DataOutput outStream = new DataOutputStream(fileOutStream); + + HCatRecord[] recs = getHCatRecords(); + for (int i = 0; i < recs.length; i++) { + recs[i].write(outStream); + } + fileOutStream.flush(); + fileOutStream.close(); + + InputStream fInStream = new FileInputStream(f); + DataInput inpStream = new DataInputStream(fInStream); + + for (int i = 0; i < recs.length; i++) { + HCatRecord rec = new DefaultHCatRecord(); + rec.readFields(inpStream); + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(recs[i], rec)); + } + + Assert.assertEquals(fInStream.available(), 0); + fInStream.close(); + + } + + public void testCompareTo() { + HCatRecord[] recs = getHCatRecords(); + Assert.assertTrue(HCatDataCheckUtil.compareRecords(recs[0], recs[1]) == 0); + Assert.assertTrue(HCatDataCheckUtil.compareRecords(recs[4], recs[5]) == 0); + } + + public void testEqualsObject() { + + HCatRecord[] recs = getHCatRecords(); + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(recs[0], recs[1])); + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(recs[4], recs[5])); + } + + /** + * Test get and set calls with type + * @throws HCatException + */ + public void testGetSetByType1() throws HCatException { + HCatRecord inpRec = getHCatRecords()[0]; + HCatRecord newRec = new DefaultHCatRecord(inpRec.size()); + HCatSchema hsch = + HCatSchemaUtils.getHCatSchema( + "a:tinyint,b:smallint,c:int,d:bigint,e:float,f:double,g:boolean,h:string,i:binary,j:string"); + + + newRec.setByte("a", hsch, inpRec.getByte("a", hsch)); + newRec.setShort("b", hsch, inpRec.getShort("b", hsch)); + newRec.setInteger("c", hsch, inpRec.getInteger("c", hsch)); + newRec.setLong("d", hsch, inpRec.getLong("d", hsch)); + newRec.setFloat("e", hsch, inpRec.getFloat("e", hsch)); + newRec.setDouble("f", hsch, inpRec.getDouble("f", hsch)); + newRec.setBoolean("g", hsch, inpRec.getBoolean("g", hsch)); + newRec.setString("h", hsch, inpRec.getString("h", hsch)); + newRec.setByteArray("i", hsch, inpRec.getByteArray("i", hsch)); + newRec.setString("j", hsch, inpRec.getString("j", hsch)); + + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(newRec, inpRec)); + + + } + + /** + * Test get and set calls with type + * @throws HCatException + */ + public void testGetSetByType2() throws HCatException { + HCatRecord inpRec = getGetSet2InpRec(); + + HCatRecord newRec = new DefaultHCatRecord(inpRec.size()); + HCatSchema hsch = + HCatSchemaUtils.getHCatSchema("a:binary,b:map,c:array,d:struct"); + + + newRec.setByteArray("a", hsch, inpRec.getByteArray("a", hsch)); + newRec.setMap("b", hsch, inpRec.getMap("b", hsch)); + newRec.setList("c", hsch, inpRec.getList("c", hsch)); + newRec.setStruct("d", hsch, inpRec.getStruct("d", hsch)); + + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(newRec, inpRec)); + } + + + private HCatRecord getGetSet2InpRec() { + List rlist = new ArrayList(); + + rlist.add(new byte[]{1, 2, 3}); + + Map mapcol = new HashMap(3); + mapcol.put(new Short("2"), "hcat is cool"); + mapcol.put(new Short("3"), "is it?"); + mapcol.put(new Short("4"), "or is it not?"); + rlist.add(mapcol); + + List listcol = new ArrayList(); + listcol.add(314); + listcol.add(007); + rlist.add(listcol);//list + rlist.add(listcol);//struct + return new DefaultHCatRecord(rlist); + } + + private HCatRecord[] getHCatRecords() { + + List rec_1 = new ArrayList(8); + rec_1.add(new Byte("123")); + rec_1.add(new Short("456")); + rec_1.add(new Integer(789)); + rec_1.add(new Long(1000L)); + rec_1.add(new Float(5.3F)); + rec_1.add(new Double(5.3D)); + rec_1.add(new Boolean(true)); + rec_1.add(new String("hcat and hadoop")); + rec_1.add(null); + rec_1.add("null"); + + HCatRecord tup_1 = new DefaultHCatRecord(rec_1); + + List rec_2 = new ArrayList(8); + rec_2.add(new Byte("123")); + rec_2.add(new Short("456")); + rec_2.add(new Integer(789)); + rec_2.add(new Long(1000L)); + rec_2.add(new Float(5.3F)); + rec_2.add(new Double(5.3D)); + rec_2.add(new Boolean(true)); + rec_2.add(new String("hcat and hadoop")); + rec_2.add(null); + rec_2.add("null"); + HCatRecord tup_2 = new DefaultHCatRecord(rec_2); + + List rec_3 = new ArrayList(10); + rec_3.add(new Byte("123")); + rec_3.add(new Short("456")); + rec_3.add(new Integer(789)); + rec_3.add(new Long(1000L)); + rec_3.add(new Double(5.3D)); + rec_3.add(new String("hcat and hadoop")); + rec_3.add(null); + List innerList = new ArrayList(); + innerList.add(314); + innerList.add(007); + rec_3.add(innerList); + Map map = new HashMap(3); + map.put(new Short("2"), "hcat is cool"); + map.put(new Short("3"), "is it?"); + map.put(new Short("4"), "or is it not?"); + rec_3.add(map); + + HCatRecord tup_3 = new DefaultHCatRecord(rec_3); + + List rec_4 = new ArrayList(8); + rec_4.add(new Byte("123")); + rec_4.add(new Short("456")); + rec_4.add(new Integer(789)); + rec_4.add(new Long(1000L)); + rec_4.add(new Double(5.3D)); + rec_4.add(new String("hcat and hadoop")); + rec_4.add(null); + rec_4.add("null"); + + Map map2 = new HashMap(3); + map2.put(new Short("2"), "hcat is cool"); + map2.put(new Short("3"), "is it?"); + map2.put(new Short("4"), "or is it not?"); + rec_4.add(map2); + List innerList2 = new ArrayList(); + innerList2.add(314); + innerList2.add(007); + rec_4.add(innerList2); + HCatRecord tup_4 = new DefaultHCatRecord(rec_4); + + + List rec_5 = new ArrayList(3); + rec_5.add(getByteArray()); + rec_5.add(getStruct()); + rec_5.add(getList()); + HCatRecord tup_5 = new DefaultHCatRecord(rec_5); + + + List rec_6 = new ArrayList(3); + rec_6.add(getByteArray()); + rec_6.add(getStruct()); + rec_6.add(getList()); + HCatRecord tup_6 = new DefaultHCatRecord(rec_6); + + + return new HCatRecord[]{tup_1, tup_2, tup_3, tup_4, tup_5, tup_6}; + + } + + private Object getList() { + return getStruct(); + } + + private Object getByteArray() { + return new byte[]{1, 2, 3, 4}; + } + + private List getStruct() { + List struct = new ArrayList(); + struct.add(new Integer(1)); + struct.add(new String("x")); + return struct; + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestHCatRecordSerDe.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestHCatRecordSerDe.java new file mode 100644 index 0000000..3d14a1f --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestHCatRecordSerDe.java @@ -0,0 +1,169 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.data; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; + +import junit.framework.Assert; +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.io.Writable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestHCatRecordSerDe extends TestCase { + + private static final Logger LOG = LoggerFactory.getLogger(TestHCatRecordSerDe.class); + + public Map getData() { + Map data = new HashMap(); + + List rlist = new ArrayList(11); + rlist.add(new Byte("123")); + rlist.add(new Short("456")); + rlist.add(new Integer(789)); + rlist.add(new Long(1000L)); + rlist.add(new Double(5.3D)); + rlist.add(new Float(2.39F)); + rlist.add(new String("hcat and hadoop")); + rlist.add(null); + + List innerStruct = new ArrayList(2); + innerStruct.add(new String("abc")); + innerStruct.add(new String("def")); + rlist.add(innerStruct); + + List innerList = new ArrayList(); + innerList.add(314); + innerList.add(007); + rlist.add(innerList); + + Map map = new HashMap(3); + map.put(new Short("2"), "hcat is cool"); + map.put(new Short("3"), "is it?"); + map.put(new Short("4"), "or is it not?"); + rlist.add(map); + + rlist.add(new Boolean(true)); + + List c1 = new ArrayList(); + List c1_1 = new ArrayList(); + c1_1.add(new Integer(12)); + List i2 = new ArrayList(); + List ii1 = new ArrayList(); + ii1.add(new Integer(13)); + ii1.add(new Integer(14)); + i2.add(ii1); + Map> ii2 = new HashMap>(); + List iii1 = new ArrayList(); + iii1.add(new Integer(15)); + ii2.put("phew", iii1); + i2.add(ii2); + c1_1.add(i2); + c1.add(c1_1); + rlist.add(c1); + List am = new ArrayList(); + Map am_1 = new HashMap(); + am_1.put("noo", "haha"); + am.add(am_1); + rlist.add(am); + List aa = new ArrayList(); + List aa_1 = new ArrayList(); + aa_1.add("bloo"); + aa_1.add("bwahaha"); + aa.add(aa_1); + rlist.add(aa); + + String typeString = + "tinyint,smallint,int,bigint,double,float,string,string," + + "struct,array,map,boolean," + + "array,ii2:map>>>>," + + "array>,array>"; + Properties props = new Properties(); + + props.put(serdeConstants.LIST_COLUMNS, "ti,si,i,bi,d,f,s,n,r,l,m,b,c1,am,aa"); + props.put(serdeConstants.LIST_COLUMN_TYPES, typeString); +// props.put(Constants.SERIALIZATION_NULL_FORMAT, "\\N"); +// props.put(Constants.SERIALIZATION_FORMAT, "1"); + + data.put(props, new DefaultHCatRecord(rlist)); + return data; + } + + public void testRW() throws Exception { + + Configuration conf = new Configuration(); + + for (Entry e : getData().entrySet()) { + Properties tblProps = e.getKey(); + HCatRecord r = e.getValue(); + + HCatRecordSerDe hrsd = new HCatRecordSerDe(); + hrsd.initialize(conf, tblProps); + + LOG.info("ORIG: {}", r); + + Writable s = hrsd.serialize(r, hrsd.getObjectInspector()); + LOG.info("ONE: {}", s); + + HCatRecord r2 = (HCatRecord) hrsd.deserialize(s); + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, r2)); + + // If it went through correctly, then s is also a HCatRecord, + // and also equal to the above, and a deepcopy, and this holds + // through for multiple levels more of serialization as well. + + Writable s2 = hrsd.serialize(s, hrsd.getObjectInspector()); + LOG.info("TWO: {}", s2); + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) s)); + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) s2)); + + // serialize using another serde, and read out that object repr. + LazySimpleSerDe testSD = new LazySimpleSerDe(); + testSD.initialize(conf, tblProps); + + Writable s3 = testSD.serialize(s, hrsd.getObjectInspector()); + LOG.info("THREE: {}", s3); + Object o3 = testSD.deserialize(s3); + Assert.assertFalse(r.getClass().equals(o3.getClass())); + + // then serialize again using hrsd, and compare results + HCatRecord s4 = (HCatRecord) hrsd.serialize(o3, testSD.getObjectInspector()); + LOG.info("FOUR: {}", s4); + + // Test LazyHCatRecord init and read + LazyHCatRecord s5 = new LazyHCatRecord(o3, testSD.getObjectInspector()); + LOG.info("FIVE: {}", s5); + + LazyHCatRecord s6 = new LazyHCatRecord(s4, hrsd.getObjectInspector()); + LOG.info("SIX: {}", s6); + + } + + } + +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestJsonSerDe.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestJsonSerDe.java new file mode 100644 index 0000000..cccd3f1 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestJsonSerDe.java @@ -0,0 +1,214 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.data; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.io.Writable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestJsonSerDe extends TestCase { + + private static final Logger LOG = LoggerFactory.getLogger(TestJsonSerDe.class); + + public List> getData() { + List> data = new ArrayList>(); + + List rlist = new ArrayList(13); + rlist.add(new Byte("123")); + rlist.add(new Short("456")); + rlist.add(new Integer(789)); + rlist.add(new Long(1000L)); + rlist.add(new Double(5.3D)); + rlist.add(new Float(2.39F)); + rlist.add(new String("hcat and hadoop")); + rlist.add(null); + + List innerStruct = new ArrayList(2); + innerStruct.add(new String("abc")); + innerStruct.add(new String("def")); + rlist.add(innerStruct); + + List innerList = new ArrayList(); + innerList.add(314); + innerList.add(007); + rlist.add(innerList); + + Map map = new HashMap(3); + map.put(new Short("2"), "hcat is cool"); + map.put(new Short("3"), "is it?"); + map.put(new Short("4"), "or is it not?"); + rlist.add(map); + + rlist.add(new Boolean(true)); + + List c1 = new ArrayList(); + List c1_1 = new ArrayList(); + c1_1.add(new Integer(12)); + List i2 = new ArrayList(); + List ii1 = new ArrayList(); + ii1.add(new Integer(13)); + ii1.add(new Integer(14)); + i2.add(ii1); + Map> ii2 = new HashMap>(); + List iii1 = new ArrayList(); + iii1.add(new Integer(15)); + ii2.put("phew", iii1); + i2.add(ii2); + c1_1.add(i2); + c1.add(c1_1); + rlist.add(c1); + + List nlist = new ArrayList(13); + nlist.add(null); // tinyint + nlist.add(null); // smallint + nlist.add(null); // int + nlist.add(null); // bigint + nlist.add(null); // double + nlist.add(null); // float + nlist.add(null); // string + nlist.add(null); // string + nlist.add(null); // struct + nlist.add(null); // array + nlist.add(null); // map + nlist.add(null); // bool + nlist.add(null); // complex + + String typeString = + "tinyint,smallint,int,bigint,double,float,string,string," + + "struct,array,map,boolean," + + "array,ii2:map>>>>"; + Properties props = new Properties(); + + props.put(serdeConstants.LIST_COLUMNS, "ti,si,i,bi,d,f,s,n,r,l,m,b,c1"); + props.put(serdeConstants.LIST_COLUMN_TYPES, typeString); +// props.put(Constants.SERIALIZATION_NULL_FORMAT, "\\N"); +// props.put(Constants.SERIALIZATION_FORMAT, "1"); + + data.add(new Pair(props, new DefaultHCatRecord(rlist))); + data.add(new Pair(props, new DefaultHCatRecord(nlist))); + return data; + } + + public void testRW() throws Exception { + + Configuration conf = new Configuration(); + + for (Pair e : getData()) { + Properties tblProps = e.first; + HCatRecord r = e.second; + + HCatRecordSerDe hrsd = new HCatRecordSerDe(); + hrsd.initialize(conf, tblProps); + + JsonSerDe jsde = new JsonSerDe(); + jsde.initialize(conf, tblProps); + + LOG.info("ORIG:{}", r); + + Writable s = hrsd.serialize(r, hrsd.getObjectInspector()); + LOG.info("ONE:{}", s); + + Object o1 = hrsd.deserialize(s); + assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o1)); + + Writable s2 = jsde.serialize(o1, hrsd.getObjectInspector()); + LOG.info("TWO:{}", s2); + Object o2 = jsde.deserialize(s2); + LOG.info("deserialized TWO : {} ", o2); + + assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2)); + } + + } + + public void testRobustRead() throws Exception { + /** + * This test has been added to account for HCATALOG-436 + * We write out columns with "internal column names" such + * as "_col0", but try to read with retular column names. + */ + + Configuration conf = new Configuration(); + + for (Pair e : getData()) { + Properties tblProps = e.first; + HCatRecord r = e.second; + + Properties internalTblProps = new Properties(); + for (Map.Entry pe : tblProps.entrySet()) { + if (!pe.getKey().equals(serdeConstants.LIST_COLUMNS)) { + internalTblProps.put(pe.getKey(), pe.getValue()); + } else { + internalTblProps.put(pe.getKey(), getInternalNames((String) pe.getValue())); + } + } + + LOG.info("orig tbl props:{}", tblProps); + LOG.info("modif tbl props:{}", internalTblProps); + + JsonSerDe wjsd = new JsonSerDe(); + wjsd.initialize(conf, internalTblProps); + + JsonSerDe rjsd = new JsonSerDe(); + rjsd.initialize(conf, tblProps); + + LOG.info("ORIG:{}", r); + + Writable s = wjsd.serialize(r, wjsd.getObjectInspector()); + LOG.info("ONE:{}", s); + + Object o1 = wjsd.deserialize(s); + LOG.info("deserialized ONE : {} ", o1); + + Object o2 = rjsd.deserialize(s); + LOG.info("deserialized TWO : {} ", o2); + assertTrue(HCatDataCheckUtil.recordsEqual(r, (HCatRecord) o2)); + } + + } + + String getInternalNames(String columnNames) { + if (columnNames == null) { + return null; + } + if (columnNames.isEmpty()) { + return ""; + } + + StringBuffer sb = new StringBuffer(); + int numStrings = columnNames.split(",").length; + sb.append("_col0"); + for (int i = 1; i < numStrings; i++) { + sb.append(","); + sb.append(HiveConf.getColumnInternalName(i)); + } + return sb.toString(); + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestLazyHCatRecord.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestLazyHCatRecord.java new file mode 100644 index 0000000..6d6f35a --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestLazyHCatRecord.java @@ -0,0 +1,193 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.data; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils; +import org.junit.Assert; +import org.junit.Test; + +public class TestLazyHCatRecord { + + private final int INT_CONST = 789; + private final long LONG_CONST = 5000000000L; + private final double DOUBLE_CONST = 3.141592654; + private final String STRING_CONST = "hello world"; + + @Test + public void testGet() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + Assert.assertEquals(INT_CONST, ((Integer) r.get(0)).intValue()); + Assert.assertEquals(LONG_CONST, ((Long) r.get(1)).longValue()); + Assert.assertEquals(DOUBLE_CONST, ((Double) r.get(2)).doubleValue(), 0); + Assert.assertEquals(STRING_CONST, (String) r.get(3)); + } + + @Test + public void testGetWithName() throws Exception { + TypeInfo ti = getTypeInfo(); + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector(ti)); + HCatSchema schema = HCatSchemaUtils.getHCatSchema(ti) + .get(0).getStructSubSchema(); + Assert.assertEquals(INT_CONST, ((Integer) r.get("an_int", schema)).intValue()); + Assert.assertEquals(LONG_CONST, ((Long) r.get("a_long", schema)).longValue()); + Assert.assertEquals(DOUBLE_CONST, ((Double) r.get("a_double", schema)).doubleValue(), 0); + Assert.assertEquals(STRING_CONST, (String) r.get("a_string", schema)); + } + + @Test + public void testGetAll() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + List list = r.getAll(); + Assert.assertEquals(INT_CONST, ((Integer) list.get(0)).intValue()); + Assert.assertEquals(LONG_CONST, ((Long) list.get(1)).longValue()); + Assert.assertEquals(DOUBLE_CONST, ((Double) list.get(2)).doubleValue(), 0); + Assert.assertEquals(STRING_CONST, (String) list.get(3)); + } + + @Test + public void testSet() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + boolean sawException = false; + try { + r.set(3, "Mary had a little lamb"); + } catch (UnsupportedOperationException uoe) { + sawException = true; + } + Assert.assertTrue(sawException); + } + + @Test + public void testSize() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + Assert.assertEquals(4, r.size()); + } + + @Test + public void testReadFields() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + boolean sawException = false; + try { + r.readFields(null); + } catch (UnsupportedOperationException uoe) { + sawException = true; + } + Assert.assertTrue(sawException); + } + + @Test + public void testWrite() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + boolean sawException = false; + try { + r.write(null); + } catch (UnsupportedOperationException uoe) { + sawException = true; + } + Assert.assertTrue(sawException); + } + + @Test + public void testSetWithName() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + boolean sawException = false; + try { + r.set("fred", null, "bob"); + } catch (UnsupportedOperationException uoe) { + sawException = true; + } + Assert.assertTrue(sawException); + } + + @Test + public void testRemove() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + boolean sawException = false; + try { + r.remove(0); + } catch (UnsupportedOperationException uoe) { + sawException = true; + } + Assert.assertTrue(sawException); + } + + @Test + public void testCopy() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()); + boolean sawException = false; + try { + r.copy(null); + } catch (UnsupportedOperationException uoe) { + sawException = true; + } + Assert.assertTrue(sawException); + } + + @Test + public void testGetWritable() throws Exception { + HCatRecord r = new LazyHCatRecord(getHCatRecord(), getObjectInspector()).getWritable(); + Assert.assertEquals(INT_CONST, ((Integer) r.get(0)).intValue()); + Assert.assertEquals(LONG_CONST, ((Long) r.get(1)).longValue()); + Assert.assertEquals(DOUBLE_CONST, ((Double) r.get(2)).doubleValue(), 0); + Assert.assertEquals(STRING_CONST, (String) r.get(3)); + Assert.assertEquals("org.apache.hive.hcatalog.data.DefaultHCatRecord", r.getClass().getName()); + } + + private HCatRecord getHCatRecord() throws Exception { + List rec_1 = new ArrayList(4); + rec_1.add( new Integer(INT_CONST)); + rec_1.add( new Long(LONG_CONST)); + rec_1.add( new Double(DOUBLE_CONST)); + rec_1.add( new String(STRING_CONST)); + + return new DefaultHCatRecord(rec_1); + } + + private TypeInfo getTypeInfo() throws Exception { + List names = new ArrayList(4); + names.add("an_int"); + names.add("a_long"); + names.add("a_double"); + names.add("a_string"); + + List tis = new ArrayList(4); + tis.add(TypeInfoFactory.getPrimitiveTypeInfo("int")); + tis.add(TypeInfoFactory.getPrimitiveTypeInfo("bigint")); + tis.add(TypeInfoFactory.getPrimitiveTypeInfo("double")); + tis.add(TypeInfoFactory.getPrimitiveTypeInfo("string")); + + return TypeInfoFactory.getStructTypeInfo(names, tis); + } + + private ObjectInspector getObjectInspector(TypeInfo ti) throws Exception { + return HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector((StructTypeInfo)ti); + } + + private ObjectInspector getObjectInspector() throws Exception { + return HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector( + (StructTypeInfo)getTypeInfo()); + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestReaderWriter.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestReaderWriter.java new file mode 100644 index 0000000..c1017fc --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/TestReaderWriter.java @@ -0,0 +1,183 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.data; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.ql.CommandNeedRetryException; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.transfer.DataTransferFactory; +import org.apache.hive.hcatalog.data.transfer.HCatReader; +import org.apache.hive.hcatalog.data.transfer.HCatWriter; +import org.apache.hive.hcatalog.data.transfer.ReadEntity; +import org.apache.hive.hcatalog.data.transfer.ReaderContext; +import org.apache.hive.hcatalog.data.transfer.WriteEntity; +import org.apache.hive.hcatalog.data.transfer.WriterContext; +import org.apache.hive.hcatalog.mapreduce.HCatBaseTest; +import org.junit.Assert; +import org.junit.Test; + +public class TestReaderWriter extends HCatBaseTest { + + @Test + public void test() throws MetaException, CommandNeedRetryException, + IOException, ClassNotFoundException { + + driver.run("drop table mytbl"); + driver.run("create table mytbl (a string, b int)"); + Iterator> itr = hiveConf.iterator(); + Map map = new HashMap(); + while (itr.hasNext()) { + Entry kv = itr.next(); + map.put(kv.getKey(), kv.getValue()); + } + + WriterContext cntxt = runsInMaster(map); + + File writeCntxtFile = File.createTempFile("hcat-write", "temp"); + writeCntxtFile.deleteOnExit(); + + // Serialize context. + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(writeCntxtFile)); + oos.writeObject(cntxt); + oos.flush(); + oos.close(); + + // Now, deserialize it. + ObjectInputStream ois = new ObjectInputStream(new FileInputStream(writeCntxtFile)); + cntxt = (WriterContext) ois.readObject(); + ois.close(); + + runsInSlave(cntxt); + commit(map, true, cntxt); + + ReaderContext readCntxt = runsInMaster(map, false); + + File readCntxtFile = File.createTempFile("hcat-read", "temp"); + readCntxtFile.deleteOnExit(); + oos = new ObjectOutputStream(new FileOutputStream(readCntxtFile)); + oos.writeObject(readCntxt); + oos.flush(); + oos.close(); + + ois = new ObjectInputStream(new FileInputStream(readCntxtFile)); + readCntxt = (ReaderContext) ois.readObject(); + ois.close(); + + for (InputSplit split : readCntxt.getSplits()) { + runsInSlave(split, readCntxt.getConf()); + } + } + + private WriterContext runsInMaster(Map config) throws HCatException { + + WriteEntity.Builder builder = new WriteEntity.Builder(); + WriteEntity entity = builder.withTable("mytbl").build(); + HCatWriter writer = DataTransferFactory.getHCatWriter(entity, config); + WriterContext info = writer.prepareWrite(); + return info; + } + + private ReaderContext runsInMaster(Map config, boolean bogus) + throws HCatException { + ReadEntity entity = new ReadEntity.Builder().withTable("mytbl").build(); + HCatReader reader = DataTransferFactory.getHCatReader(entity, config); + ReaderContext cntxt = reader.prepareRead(); + return cntxt; + } + + private void runsInSlave(InputSplit split, Configuration config) throws HCatException { + + HCatReader reader = DataTransferFactory.getHCatReader(split, config); + Iterator itr = reader.read(); + int i = 1; + while (itr.hasNext()) { + HCatRecord read = itr.next(); + HCatRecord written = getRecord(i++); + // Argh, HCatRecord doesnt implement equals() + Assert.assertTrue("Read: " + read.get(0) + "Written: " + written.get(0), + written.get(0).equals(read.get(0))); + Assert.assertTrue("Read: " + read.get(1) + "Written: " + written.get(1), + written.get(1).equals(read.get(1))); + Assert.assertEquals(2, read.size()); + } + //Assert.assertFalse(itr.hasNext()); + } + + private void runsInSlave(WriterContext context) throws HCatException { + + HCatWriter writer = DataTransferFactory.getHCatWriter(context); + writer.write(new HCatRecordItr()); + } + + private void commit(Map config, boolean status, + WriterContext context) throws IOException { + + WriteEntity.Builder builder = new WriteEntity.Builder(); + WriteEntity entity = builder.withTable("mytbl").build(); + HCatWriter writer = DataTransferFactory.getHCatWriter(entity, config); + if (status) { + writer.commit(context); + } else { + writer.abort(context); + } + } + + private static HCatRecord getRecord(int i) { + List list = new ArrayList(2); + list.add("Row #: " + i); + list.add(i); + return new DefaultHCatRecord(list); + } + + private static class HCatRecordItr implements Iterator { + + int i = 0; + + @Override + public boolean hasNext() { + return i++ < 100 ? true : false; + } + + @Override + public HCatRecord next() { + return getRecord(i); + } + + @Override + public void remove() { + throw new RuntimeException(); + } + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/schema/TestHCatSchema.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/schema/TestHCatSchema.java new file mode 100644 index 0000000..5d888cd --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/schema/TestHCatSchema.java @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.data.schema; + +import junit.framework.TestCase; +import org.apache.hive.hcatalog.common.HCatException; + +import java.util.ArrayList; +import java.util.List; + +public class TestHCatSchema extends TestCase { + public void testCannotAddFieldMoreThanOnce() throws HCatException { + List fieldSchemaList = new ArrayList(); + fieldSchemaList.add(new HCatFieldSchema("name", HCatFieldSchema.Type.STRING, "What's your handle?")); + fieldSchemaList.add(new HCatFieldSchema("age", HCatFieldSchema.Type.INT, "So very old")); + + HCatSchema schema = new HCatSchema(fieldSchemaList); + + assertTrue(schema.getFieldNames().contains("age")); + assertEquals(2, schema.getFields().size()); + + try { + schema.append(new HCatFieldSchema("age", HCatFieldSchema.Type.INT, "So very old")); + fail("Was able to append field schema with same name"); + } catch (HCatException he) { + assertTrue(he.getMessage().contains("Attempt to append HCatFieldSchema with already existing name: age.")); + } + + assertTrue(schema.getFieldNames().contains("age")); + assertEquals(2, schema.getFields().size()); + + // Should also not be able to add fields of different types with same name + try { + schema.append(new HCatFieldSchema("age", HCatFieldSchema.Type.STRING, "Maybe spelled out?")); + fail("Was able to append field schema with same name"); + } catch (HCatException he) { + assertTrue(he.getMessage().contains("Attempt to append HCatFieldSchema with already existing name: age.")); + } + + assertTrue(schema.getFieldNames().contains("age")); + assertEquals(2, schema.getFields().size()); + } + + public void testHashCodeEquals() throws HCatException { + HCatFieldSchema memberID1 = new HCatFieldSchema("memberID", HCatFieldSchema.Type.INT, "as a number"); + HCatFieldSchema memberID2 = new HCatFieldSchema("memberID", HCatFieldSchema.Type.INT, "as a number"); + assertTrue("Expected objects to be equal", memberID1.equals(memberID2)); + assertTrue("Expected hash codes to be equal", memberID1.hashCode() == memberID2.hashCode()); + } + + public void testCannotInstantiateSchemaWithRepeatedFieldNames() throws HCatException { + List fieldSchemaList = new ArrayList(); + + fieldSchemaList.add(new HCatFieldSchema("memberID", HCatFieldSchema.Type.INT, "as a number")); + fieldSchemaList.add(new HCatFieldSchema("location", HCatFieldSchema.Type.STRING, "there's Waldo")); + + // No duplicate names. This should be ok + HCatSchema schema = new HCatSchema(fieldSchemaList); + + fieldSchemaList.add(new HCatFieldSchema("memberID", HCatFieldSchema.Type.STRING, "as a String")); + + // Now a duplicated field name. Should fail + try { + HCatSchema schema2 = new HCatSchema(fieldSchemaList); + fail("Able to add duplicate field name"); + } catch (IllegalArgumentException iae) { + assertTrue(iae.getMessage().contains("Field named memberID already exists")); + } + } + public void testRemoveAddField() throws HCatException { + List fieldSchemaList = new ArrayList(); + + fieldSchemaList.add(new HCatFieldSchema("memberID", HCatFieldSchema.Type.INT, "as a number")); + HCatFieldSchema locationField = new HCatFieldSchema("location", HCatFieldSchema.Type.STRING, "there's Waldo"); + fieldSchemaList.add(locationField); + HCatSchema schema = new HCatSchema(fieldSchemaList); + schema.remove(locationField); + Integer position = schema.getPosition(locationField.getName()); + assertTrue("position is not null after remove" , position == null); + try { + schema.append(locationField); + } + catch (HCatException ex) { + assertFalse(ex.getMessage(), true); + } + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/schema/TestHCatSchemaUtils.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/schema/TestHCatSchemaUtils.java new file mode 100644 index 0000000..bc19f89 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/data/schema/TestHCatSchemaUtils.java @@ -0,0 +1,82 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.data.schema; + +import java.io.PrintStream; + +import junit.framework.TestCase; + +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestHCatSchemaUtils extends TestCase { + + private static final Logger LOG = LoggerFactory.getLogger(TestHCatSchemaUtils.class); + + public void testSimpleOperation() throws Exception { + String typeString = "struct," + + "currently_registered_courses:array," + + "current_grades:map," + + "phnos:array>,blah:array>"; + + TypeInfo ti = TypeInfoUtils.getTypeInfoFromTypeString(typeString); + + HCatSchema hsch = HCatSchemaUtils.getHCatSchemaFromTypeString(typeString); + LOG.info("Type name : {}", ti.getTypeName()); + LOG.info("HCatSchema : {}", hsch); + assertEquals(hsch.size(), 1); + assertEquals(ti.getTypeName(), hsch.get(0).getTypeString()); + assertEquals(hsch.get(0).getTypeString(), typeString); + } + + @SuppressWarnings("unused") + private void pretty_print(PrintStream pout, HCatSchema hsch) throws HCatException { + pretty_print(pout, hsch, ""); + } + + + private void pretty_print(PrintStream pout, HCatSchema hsch, String prefix) throws HCatException { + int i = 0; + for (HCatFieldSchema field : hsch.getFields()) { + pretty_print(pout, field, prefix + "." + (field.getName() == null ? i : field.getName())); + i++; + } + } + + private void pretty_print(PrintStream pout, HCatFieldSchema hfsch, String prefix) throws HCatException { + + Category tcat = hfsch.getCategory(); + if (Category.STRUCT == tcat) { + pretty_print(pout, hfsch.getStructSubSchema(), prefix); + } else if (Category.ARRAY == tcat) { + pretty_print(pout, hfsch.getArrayElementSchema(), prefix); + } else if (Category.MAP == tcat) { + pout.println(prefix + ".mapkey:\t" + hfsch.getMapKeyType().toString()); + pretty_print(pout, hfsch.getMapValueSchema(), prefix + ".mapvalue:"); + } else { + pout.println(prefix + "\t" + hfsch.getType().toString()); + } + } + +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/fileformats/TestOrcDynamicPartitioned.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/fileformats/TestOrcDynamicPartitioned.java new file mode 100644 index 0000000..551b471 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/fileformats/TestOrcDynamicPartitioned.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.fileformats; + +import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; +import org.apache.hadoop.hive.ql.io.orc.OrcSerde; +import org.apache.hive.hcatalog.mapreduce.TestHCatDynamicPartitioned; +import org.junit.BeforeClass; + +public class TestOrcDynamicPartitioned extends TestHCatDynamicPartitioned { + + @BeforeClass + public static void generateInputData() throws Exception { + tableName = "testOrcDynamicPartitionedTable"; + generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); + generateDataColumns(); + } + + @Override + protected String inputFormat() { + return OrcInputFormat.class.getName(); + } + + @Override + protected String outputFormat() { + return OrcOutputFormat.class.getName(); + } + + @Override + protected String serdeClass() { + return OrcSerde.class.getName(); + } + +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapred/TestHiveHCatInputFormat.java.broken hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapred/TestHiveHCatInputFormat.java.broken new file mode 100644 index 0000000..082d723 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapred/TestHiveHCatInputFormat.java.broken @@ -0,0 +1,193 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hcatalog.mapred; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Properties; + +import junit.framework.TestCase; + +import org.apache.hadoop.hive.ql.CommandNeedRetryException; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.io.RCFileInputFormat; +import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; +import org.apache.hcatalog.MiniCluster; +import org.apache.hcatalog.data.HCatDataCheckUtil; +import org.apache.hcatalog.mapred.HCatMapredInputFormat; +import org.apache.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hcatalog.storagehandler.HCatStorageHandlerImpl; +import org.apache.pig.ExecType; +import org.apache.pig.PigServer; +import org.apache.pig.impl.util.UDFContext; + +public class TestHiveHCatInputFormat extends TestCase { + private static MiniCluster cluster = MiniCluster.buildCluster(); + private static Driver driver; + + String PTNED_TABLE = "junit_testhiveinputintegration_ptni"; + String UNPTNED_TABLE = "junit_testhiveinputintegration_noptn"; + String basicFile = "/tmp/"+PTNED_TABLE+".file"; + + public void testFromHive() throws Exception { + if (driver == null){ + driver = HCatDataCheckUtil.instantiateDriver(cluster); + } + + Properties props = new Properties(); + props.setProperty("fs.default.name", cluster.getProperties().getProperty("fs.default.name")); + String basicFileFullName = cluster.getProperties().getProperty("fs.default.name") + basicFile; + + cleanup(); + + // create source data file + HCatDataCheckUtil.generateDataFile(cluster,basicFile); + + String createPtnedTable = "(j int, s string) partitioned by (i int) " + +"stored by '"+HCatStorageHandlerImpl.class.getName()+"' tblproperties" + + "('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," + + "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver') "; + + HCatDataCheckUtil.createTable(driver,PTNED_TABLE,createPtnedTable); + + String createUnptnedTable = "(i int, j int, s string) " + +"stored by '"+HCatStorageHandlerImpl.class.getName()+"' tblproperties" + + "('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," + + "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver') "; + + HCatDataCheckUtil.createTable(driver,UNPTNED_TABLE,createUnptnedTable); + + + driver.run("describe extended "+UNPTNED_TABLE); + ArrayList des_values = new ArrayList(); + driver.getResults(des_values); + for (String s : des_values){ + System.err.println("du:"+s); + } + + driver.run("describe extended "+PTNED_TABLE); + ArrayList des2_values = new ArrayList(); + driver.getResults(des2_values); + for (String s : des2_values){ + System.err.println("dp:"+s); + } + + // use pig to read from source file and put into this table + + PigServer server = new PigServer(ExecType.LOCAL, props); + UDFContext.getUDFContext().setClientSystemProps(); + server.setBatchOn(); + server.registerQuery("A = load '"+basicFileFullName+"' as (i:int, j:int, s:chararray);"); + server.registerQuery("store A into '"+UNPTNED_TABLE+"' using org.apache.hcatalog.pig.HCatStorer();"); + server.executeBatch(); + + server.setBatchOn(); + server.registerQuery("A = load '"+basicFileFullName+"' as (i:int, j:int, s:chararray);"); + server.registerQuery("store A into '"+PTNED_TABLE+"' using org.apache.hcatalog.pig.HCatStorer();"); + server.executeBatch(); + + // partitioned by i + // select * from tbl; + // select j,s,i from tbl; + // select * from tbl where i = 3; + // select j,s,i from tbl where i = 3; + // select * from tbl where j = 3; + // select j,s,i from tbl where j = 3; + + ArrayList p_select_star_nofilter = HCatDataCheckUtil.formattedRun(driver, + "p_select_star_nofilter","select * from "+PTNED_TABLE); + ArrayList p_select_named_nofilter = HCatDataCheckUtil.formattedRun(driver, + "p_select_named_nofilter","select j,s,i from "+PTNED_TABLE); + + assertDataIdentical(p_select_star_nofilter,p_select_named_nofilter,50); + + ArrayList p_select_star_ptnfilter = HCatDataCheckUtil.formattedRun(driver, + "p_select_star_ptnfilter","select * from "+PTNED_TABLE+" where i = 3"); + ArrayList p_select_named_ptnfilter = HCatDataCheckUtil.formattedRun(driver, + "p_select_named_ptnfilter","select j,s,i from "+PTNED_TABLE+" where i = 3"); + + assertDataIdentical(p_select_star_ptnfilter,p_select_named_ptnfilter,10); + + ArrayList select_star_nonptnfilter = HCatDataCheckUtil.formattedRun(driver, + "select_star_nonptnfilter","select * from "+PTNED_TABLE+" where j = 28"); + ArrayList select_named_nonptnfilter = HCatDataCheckUtil.formattedRun(driver, + "select_named_nonptnfilter","select j,s,i from "+PTNED_TABLE+" where j = 28"); + + assertDataIdentical(select_star_nonptnfilter,select_named_nonptnfilter,1); + + // non-partitioned + // select * from tbl; + // select i,j,s from tbl; + // select * from tbl where i = 3; + // select i,j,s from tbl where i = 3; + + // select j,s,i from tbl; + // select j,s,i from tbl where i = 3; + + ArrayList select_star_nofilter = HCatDataCheckUtil.formattedRun(driver, + "select_star_nofilter","select * from "+UNPTNED_TABLE); //i,j,s select * order is diff for unptn + ArrayList select_ijs_nofilter = HCatDataCheckUtil.formattedRun(driver, + "select_ijs_nofilter","select i,j,s from "+UNPTNED_TABLE); + + assertDataIdentical(select_star_nofilter,select_ijs_nofilter,50); + + ArrayList select_star_ptnfilter = HCatDataCheckUtil.formattedRun(driver, + "select_star_ptnfilter","select * from "+UNPTNED_TABLE+" where i = 3"); //i,j,s + ArrayList select_ijs_ptnfilter = HCatDataCheckUtil.formattedRun(driver, + "select_ijs_ptnfilter","select i,j,s from "+UNPTNED_TABLE+" where i = 3"); + + assertDataIdentical(select_star_ptnfilter,select_ijs_ptnfilter,10); + + ArrayList select_jsi_nofilter = HCatDataCheckUtil.formattedRun(driver, + "select_jsi_nofilter","select j,s,i from "+UNPTNED_TABLE); + assertDataIdentical(p_select_named_nofilter,select_jsi_nofilter,50,true); + + ArrayList select_jsi_ptnfilter = HCatDataCheckUtil.formattedRun(driver, + "select_jsi_ptnfilter","select j,s,i from "+UNPTNED_TABLE+" where i = 3"); + assertDataIdentical(p_select_named_ptnfilter,select_jsi_ptnfilter,10,true); + + } + + private void assertDataIdentical(ArrayList result1, + ArrayList result2, int numRecords) { + assertDataIdentical(result1,result2,numRecords,false); + } + + private void assertDataIdentical(ArrayList result1, + ArrayList result2, int numRecords,boolean doSort) { + assertEquals(numRecords, result1.size()); + assertEquals(numRecords, result2.size()); + Collections.sort(result1); + Collections.sort(result2); + for (int i = 0; i < numRecords; i++){ + assertEquals(result1.get(i),result2.get(i)); + } + } + + + private void cleanup() throws IOException, CommandNeedRetryException { + MiniCluster.deleteFile(cluster, basicFile); + HCatDataCheckUtil.dropTable(driver,PTNED_TABLE); + HCatDataCheckUtil.dropTable(driver,UNPTNED_TABLE); + } + +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java new file mode 100644 index 0000000..8caa916 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatBaseTest.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.pig.PigServer; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; + +/** + * Simplify writing HCatalog tests that require a HiveMetaStore. + */ +public class HCatBaseTest { + protected static final Logger LOG = LoggerFactory.getLogger(HCatBaseTest.class); + protected static final String TEST_DATA_DIR = + "/tmp/build/test/data/" + HCatBaseTest.class.getCanonicalName(); + protected static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; + + protected HiveConf hiveConf = null; + protected Driver driver = null; + protected HiveMetaStoreClient client = null; + + @BeforeClass + public static void setUpTestDataDir() throws Exception { + LOG.info("Using warehouse directory " + TEST_WAREHOUSE_DIR); + File f = new File(TEST_WAREHOUSE_DIR); + if (f.exists()) { + FileUtil.fullyDelete(f); + } + Assert.assertTrue(new File(TEST_WAREHOUSE_DIR).mkdirs()); + } + + @Before + public void setUp() throws Exception { + if (driver == null) { + setUpHiveConf(); + driver = new Driver(hiveConf); + client = new HiveMetaStoreClient(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); + } + } + + /** + * Create a new HiveConf and set properties necessary for unit tests. + */ + protected void setUpHiveConf() { + hiveConf = new HiveConf(this.getClass()); + hiveConf.setVar(HiveConf.ConfVars.PREEXECHOOKS, ""); + hiveConf.setVar(HiveConf.ConfVars.POSTEXECHOOKS, ""); + hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false); + hiveConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, TEST_WAREHOUSE_DIR); + } + + protected void logAndRegister(PigServer server, String query) throws IOException { + LOG.info("Registering pig query: " + query); + server.registerQuery(query); + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java new file mode 100644 index 0000000..e71b672 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/HCatMapReduceTest.java @@ -0,0 +1,383 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import junit.framework.Assert; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.io.RCFileInputFormat; +import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; +import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobStatus; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.junit.Assert.assertTrue; + +/** + * Test for HCatOutputFormat. Writes a partition using HCatOutputFormat and reads + * it back using HCatInputFormat, checks the column values and counts. + */ +public abstract class HCatMapReduceTest extends HCatBaseTest { + + private static final Logger LOG = LoggerFactory.getLogger(HCatMapReduceTest.class); + protected static String dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; + protected static String tableName = "testHCatMapReduceTable"; + + private static List writeRecords = new ArrayList(); + private static List readRecords = new ArrayList(); + + protected abstract List getPartitionKeys(); + + protected abstract List getTableColumns(); + + private static FileSystem fs; + + protected Boolean isTableExternal() { + return false; + } + + protected String inputFormat() { + return RCFileInputFormat.class.getName(); + } + + protected String outputFormat() { + return RCFileOutputFormat.class.getName(); + } + + protected String serdeClass() { + return ColumnarSerDe.class.getName(); + } + + @BeforeClass + public static void setUpOneTime() throws Exception { + fs = new LocalFileSystem(); + fs.initialize(fs.getWorkingDirectory().toUri(), new Configuration()); + + HiveConf hiveConf = new HiveConf(); + hiveConf.setInt(HCatConstants.HCAT_HIVE_CLIENT_EXPIRY_TIME, 0); + // Hack to initialize cache with 0 expiry time causing it to return a new hive client every time + // Otherwise the cache doesn't play well with the second test method with the client gets closed() in the + // tearDown() of the previous test + HCatUtil.getHiveClient(hiveConf); + + MapCreate.writeCount = 0; + MapRead.readCount = 0; + } + + @After + public void deleteTable() throws Exception { + try { + String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : dbName; + + client.dropTable(databaseName, tableName); + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + @Before + public void createTable() throws Exception { + String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME : dbName; + + try { + client.dropTable(databaseName, tableName); + } catch (Exception e) { + } //can fail with NoSuchObjectException + + + Table tbl = new Table(); + tbl.setDbName(databaseName); + tbl.setTableName(tableName); + if (isTableExternal()){ + tbl.setTableType(TableType.EXTERNAL_TABLE.toString()); + } else { + tbl.setTableType(TableType.MANAGED_TABLE.toString()); + } + StorageDescriptor sd = new StorageDescriptor(); + + sd.setCols(getTableColumns()); + tbl.setPartitionKeys(getPartitionKeys()); + + tbl.setSd(sd); + + sd.setBucketCols(new ArrayList(2)); + sd.setSerdeInfo(new SerDeInfo()); + sd.getSerdeInfo().setName(tbl.getTableName()); + sd.getSerdeInfo().setParameters(new HashMap()); + sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); + if (isTableExternal()){ + sd.getSerdeInfo().getParameters().put("EXTERNAL", "TRUE"); + } + sd.getSerdeInfo().setSerializationLib(serdeClass()); + sd.setInputFormat(inputFormat()); + sd.setOutputFormat(outputFormat()); + + Map tableParams = new HashMap(); + tbl.setParameters(tableParams); + + client.createTable(tbl); + } + + //Create test input file with specified number of rows + private void createInputFile(Path path, int rowCount) throws IOException { + + if (fs.exists(path)) { + fs.delete(path, true); + } + + FSDataOutputStream os = fs.create(path); + + for (int i = 0; i < rowCount; i++) { + os.writeChars(i + "\n"); + } + + os.close(); + } + + public static class MapCreate extends + Mapper { + + static int writeCount = 0; //test will be in local mode + + @Override + public void map(LongWritable key, Text value, Context context + ) throws IOException, InterruptedException { + { + try { + HCatRecord rec = writeRecords.get(writeCount); + context.write(null, rec); + writeCount++; + + } catch (Exception e) { + + e.printStackTrace(System.err); //print since otherwise exception is lost + throw new IOException(e); + } + } + } + } + + public static class MapRead extends + Mapper { + + static int readCount = 0; //test will be in local mode + + @Override + public void map(WritableComparable key, HCatRecord value, Context context + ) throws IOException, InterruptedException { + { + try { + readRecords.add(value); + readCount++; + } catch (Exception e) { + e.printStackTrace(); //print since otherwise exception is lost + throw new IOException(e); + } + } + } + } + + Job runMRCreate(Map partitionValues, + List partitionColumns, List records, + int writeCount, boolean assertWrite) throws Exception { + return runMRCreate(partitionValues, partitionColumns, records, writeCount, assertWrite, true); + } + + /** + * Run a local map reduce job to load data from in memory records to an HCatalog Table + * @param partitionValues + * @param partitionColumns + * @param records data to be written to HCatalog table + * @param writeCount + * @param assertWrite + * @param asSingleMapTask + * @return + * @throws Exception + */ + Job runMRCreate(Map partitionValues, + List partitionColumns, List records, + int writeCount, boolean assertWrite, boolean asSingleMapTask) throws Exception { + + writeRecords = records; + MapCreate.writeCount = 0; + + Configuration conf = new Configuration(); + Job job = new Job(conf, "hcat mapreduce write test"); + job.setJarByClass(this.getClass()); + job.setMapperClass(HCatMapReduceTest.MapCreate.class); + + // input/output settings + job.setInputFormatClass(TextInputFormat.class); + + if (asSingleMapTask) { + // One input path would mean only one map task + Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); + createInputFile(path, writeCount); + TextInputFormat.setInputPaths(job, path); + } else { + // Create two input paths so that two map tasks get triggered. There could be other ways + // to trigger two map tasks. + Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput"); + createInputFile(path, writeCount / 2); + + Path path2 = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceInput2"); + createInputFile(path2, (writeCount - writeCount / 2)); + + TextInputFormat.setInputPaths(job, path, path2); + } + + job.setOutputFormatClass(HCatOutputFormat.class); + + OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partitionValues); + HCatOutputFormat.setOutput(job, outputJobInfo); + + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(DefaultHCatRecord.class); + + job.setNumReduceTasks(0); + + HCatOutputFormat.setSchema(job, new HCatSchema(partitionColumns)); + + boolean success = job.waitForCompletion(true); + + // Ensure counters are set when data has actually been read. + if (partitionValues != null) { + assertTrue(job.getCounters().getGroup("FileSystemCounters") + .findCounter("FILE_BYTES_READ").getValue() > 0); + } + + if (!HCatUtil.isHadoop23()) { + // Local mode outputcommitter hook is not invoked in Hadoop 1.x + if (success) { + new FileOutputCommitterContainer(job, null).commitJob(job); + } else { + new FileOutputCommitterContainer(job, null).abortJob(job, JobStatus.State.FAILED); + } + } + if (assertWrite) { + // we assert only if we expected to assert with this call. + Assert.assertEquals(writeCount, MapCreate.writeCount); + } + + return job; + } + + List runMRRead(int readCount) throws Exception { + return runMRRead(readCount, null); + } + + /** + * Run a local map reduce job to read records from HCatalog table and verify if the count is as expected + * @param readCount + * @param filter + * @return + * @throws Exception + */ + List runMRRead(int readCount, String filter) throws Exception { + + MapRead.readCount = 0; + readRecords.clear(); + + Configuration conf = new Configuration(); + Job job = new Job(conf, "hcat mapreduce read test"); + job.setJarByClass(this.getClass()); + job.setMapperClass(HCatMapReduceTest.MapRead.class); + + // input/output settings + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + + HCatInputFormat.setInput(job, dbName, tableName).setFilter(filter); + + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + + job.setNumReduceTasks(0); + + Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceOutput"); + if (fs.exists(path)) { + fs.delete(path, true); + } + + TextOutputFormat.setOutputPath(job, path); + + job.waitForCompletion(true); + Assert.assertEquals(readCount, MapRead.readCount); + + return readRecords; + } + + + protected HCatSchema getTableSchema() throws Exception { + + Configuration conf = new Configuration(); + Job job = new Job(conf, "hcat mapreduce read schema test"); + job.setJarByClass(this.getClass()); + + // input/output settings + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + + HCatInputFormat.setInput(job, dbName, tableName); + + return HCatInputFormat.getTableSchema(job); + } + +} + + + diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java new file mode 100644 index 0000000..ab6ff14 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatDynamicPartitioned.java @@ -0,0 +1,208 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import junit.framework.Assert; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.common.ErrorType; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static junit.framework.Assert.assertEquals; +import static junit.framework.Assert.assertTrue; + +public class TestHCatDynamicPartitioned extends HCatMapReduceTest { + + private static List writeRecords; + private static List dataColumns; + private static final Logger LOG = LoggerFactory.getLogger(TestHCatDynamicPartitioned.class); + protected static final int NUM_RECORDS = 20; + protected static final int NUM_PARTITIONS = 5; + + @BeforeClass + public static void generateInputData() throws Exception { + tableName = "testHCatDynamicPartitionedTable"; + generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); + generateDataColumns(); + } + + protected static void generateDataColumns() throws HCatException { + dataColumns = new ArrayList(); + dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); + dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); + dataColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("p1", serdeConstants.STRING_TYPE_NAME, ""))); + } + + protected static void generateWriteRecords(int max, int mod, int offset) { + writeRecords = new ArrayList(); + + for (int i = 0; i < max; i++) { + List objList = new ArrayList(); + + objList.add(i); + objList.add("strvalue" + i); + objList.add(String.valueOf((i % mod) + offset)); + writeRecords.add(new DefaultHCatRecord(objList)); + } + } + + @Override + protected List getPartitionKeys() { + List fields = new ArrayList(); + fields.add(new FieldSchema("p1", serdeConstants.STRING_TYPE_NAME, "")); + return fields; + } + + @Override + protected List getTableColumns() { + List fields = new ArrayList(); + fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); + fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); + return fields; + } + + /** + * Run the dynamic partitioning test but with single map task + * @throws Exception + */ + @Test + public void testHCatDynamicPartitionedTable() throws Exception { + runHCatDynamicPartitionedTable(true); + } + + /** + * Run the dynamic partitioning test but with multiple map task. See HCATALOG-490 + * @throws Exception + */ + @Test + public void testHCatDynamicPartitionedTableMultipleTask() throws Exception { + runHCatDynamicPartitionedTable(false); + } + + protected void runHCatDynamicPartitionedTable(boolean asSingleMapTask) throws Exception { + generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); + runMRCreate(null, dataColumns, writeRecords, NUM_RECORDS, true, asSingleMapTask); + + runMRRead(NUM_RECORDS); + + //Read with partition filter + runMRRead(4, "p1 = \"0\""); + runMRRead(8, "p1 = \"1\" or p1 = \"3\""); + runMRRead(4, "p1 = \"4\""); + + // read from hive to test + + String query = "select * from " + tableName; + int retCode = driver.run(query).getResponseCode(); + + if (retCode != 0) { + throw new Exception("Error " + retCode + " running query " + query); + } + + ArrayList res = new ArrayList(); + driver.getResults(res); + assertEquals(NUM_RECORDS, res.size()); + + + //Test for duplicate publish + IOException exc = null; + try { + generateWriteRecords(NUM_RECORDS, NUM_PARTITIONS, 0); + Job job = runMRCreate(null, dataColumns, writeRecords, NUM_RECORDS, false); + + if (HCatUtil.isHadoop23()) { + Assert.assertTrue(job.isSuccessful()==false); + } + } catch (IOException e) { + exc = e; + } + + if (!HCatUtil.isHadoop23()) { + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertTrue("Got exception of type [" + ((HCatException) exc).getErrorType().toString() + + "] Expected ERROR_PUBLISHING_PARTITION or ERROR_MOVE_FAILED", + (ErrorType.ERROR_PUBLISHING_PARTITION == ((HCatException) exc).getErrorType()) + || (ErrorType.ERROR_MOVE_FAILED == ((HCatException) exc).getErrorType()) + ); + } + + query = "show partitions " + tableName; + retCode = driver.run(query).getResponseCode(); + if (retCode != 0) { + throw new Exception("Error " + retCode + " running query " + query); + } + res = new ArrayList(); + driver.getResults(res); + assertEquals(NUM_PARTITIONS, res.size()); + + query = "select * from " + tableName; + retCode = driver.run(query).getResponseCode(); + if (retCode != 0) { + throw new Exception("Error " + retCode + " running query " + query); + } + res = new ArrayList(); + driver.getResults(res); + assertEquals(NUM_RECORDS, res.size()); + } + + //TODO 1.0 miniCluster is slow this test times out, make it work +// renaming test to make test framework skip it + public void _testHCatDynamicPartitionMaxPartitions() throws Exception { + HiveConf hc = new HiveConf(this.getClass()); + + int maxParts = hiveConf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTS); + LOG.info("Max partitions allowed = {}", maxParts); + + IOException exc = null; + try { + generateWriteRecords(maxParts + 5, maxParts + 2, 10); + runMRCreate(null, dataColumns, writeRecords, maxParts + 5, false); + } catch (IOException e) { + exc = e; + } + + if (HCatConstants.HCAT_IS_DYNAMIC_MAX_PTN_CHECK_ENABLED) { + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_TOO_MANY_DYNAMIC_PTNS, ((HCatException) exc).getErrorType()); + } else { + assertTrue(exc == null); + runMRRead(maxParts + 5); + } + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatEximInputFormat.java.broken hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatEximInputFormat.java.broken new file mode 100644 index 0000000..8b3e089 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatEximInputFormat.java.broken @@ -0,0 +1,429 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hcatalog.mapreduce; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.serde.Constants; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hcatalog.data.DefaultHCatRecord; +import org.apache.hcatalog.data.HCatRecord; +import org.apache.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hcatalog.data.schema.HCatSchema; +import org.apache.hcatalog.data.schema.HCatSchemaUtils; +import org.apache.hcatalog.mapreduce.TestHCatEximInputFormat.TestImport.EmpDetails; + +/** + * + * TestHCatEximInputFormat. tests primarily HCatEximInputFormat but + * also HCatEximOutputFormat. + * + */ +public class TestHCatEximInputFormat extends TestCase { + + public static class TestExport extends + org.apache.hadoop.mapreduce.Mapper { + + private HCatSchema recordSchema; + + @Override + protected void setup(Context context) throws IOException, + InterruptedException { + super.setup(context); + recordSchema = HCatEximOutputFormat.getTableSchema(context); + } + + @Override + public void map(LongWritable key, Text value, Context context) + throws IOException, InterruptedException { + String[] cols = value.toString().split(","); + HCatRecord record = new DefaultHCatRecord(recordSchema.size()); + record.setInteger("emp_id", recordSchema, Integer.parseInt(cols[0])); + record.setString("emp_name", recordSchema, cols[1]); + record.setString("emp_dob", recordSchema, cols[2]); + record.setString("emp_sex", recordSchema, cols[3]); + context.write(key, record); + } + } + + public static class TestImport extends + org.apache.hadoop.mapreduce.Mapper< + org.apache.hadoop.io.LongWritable, HCatRecord, + org.apache.hadoop.io.Text, + org.apache.hadoop.io.Text> { + + private HCatSchema recordSchema; + + public static class EmpDetails { + public String emp_name; + public String emp_dob; + public String emp_sex; + public String emp_country; + public String emp_state; + } + + public static Map empRecords = new TreeMap(); + + @Override + protected void setup(Context context) throws IOException, + InterruptedException { + super.setup(context); + try { + recordSchema = HCatBaseInputFormat.getOutputSchema(context); + } catch (Exception e) { + throw new IOException("Error getting outputschema from job configuration", e); + } + System.out.println("RecordSchema : " + recordSchema.toString()); + } + + @Override + public void map(LongWritable key, HCatRecord value, Context context) + throws IOException, InterruptedException { + EmpDetails empDetails = new EmpDetails(); + Integer emp_id = value.getInteger("emp_id", recordSchema); + String emp_name = value.getString("emp_name", recordSchema); + empDetails.emp_name = emp_name; + if (recordSchema.getPosition("emp_dob") != null) { + empDetails.emp_dob = value.getString("emp_dob", recordSchema); + } + if (recordSchema.getPosition("emp_sex") != null) { + empDetails.emp_sex = value.getString("emp_sex", recordSchema); + } + if (recordSchema.getPosition("emp_country") != null) { + empDetails.emp_country = value.getString("emp_country", recordSchema); + } + if (recordSchema.getPosition("emp_state") != null) { + empDetails.emp_state = value.getString("emp_state", recordSchema); + } + empRecords.put(emp_id, empDetails); + } + } + + private static final String dbName = "hcatEximOutputFormatTestDB"; + private static final String tblName = "hcatEximOutputFormatTestTable"; + Configuration conf; + Job job; + List columns; + HCatSchema schema; + FileSystem fs; + Path inputLocation; + Path outputLocation; + private HCatSchema partSchema; + + + @Override + protected void setUp() throws Exception { + System.out.println("Setup started"); + super.setUp(); + conf = new Configuration(); + job = new Job(conf, "test eximinputformat"); + columns = new ArrayList(); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_id", + Constants.INT_TYPE_NAME, ""))); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_name", + Constants.STRING_TYPE_NAME, ""))); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_dob", + Constants.STRING_TYPE_NAME, ""))); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_sex", + Constants.STRING_TYPE_NAME, ""))); + schema = new HCatSchema(columns); + + fs = new LocalFileSystem(); + fs.initialize(fs.getWorkingDirectory().toUri(), new Configuration()); + inputLocation = new Path(fs.getWorkingDirectory(), "tmp/exports"); + outputLocation = new Path(fs.getWorkingDirectory(), "tmp/data"); + + job.setJarByClass(this.getClass()); + job.setNumReduceTasks(0); + System.out.println("Setup done"); + } + + private void setupMRExport(String[] records) throws IOException { + if (fs.exists(outputLocation)) { + fs.delete(outputLocation, true); + } + FSDataOutputStream ds = fs.create(outputLocation, true); + for (String record : records) { + ds.writeBytes(record); + } + ds.close(); + job.setInputFormatClass(TextInputFormat.class); + job.setOutputFormatClass(HCatEximOutputFormat.class); + TextInputFormat.setInputPaths(job, outputLocation); + job.setMapperClass(TestExport.class); + } + + private void setupMRImport() throws IOException { + if (fs.exists(outputLocation)) { + fs.delete(outputLocation, true); + } + job.setInputFormatClass(HCatEximInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + TextOutputFormat.setOutputPath(job, outputLocation); + job.setMapperClass(TestImport.class); + TestImport.empRecords.clear(); + } + + + @Override + protected void tearDown() throws Exception { + System.out.println("Teardown started"); + super.tearDown(); + // fs.delete(inputLocation, true); + // fs.delete(outputLocation, true); + System.out.println("Teardown done"); + } + + + private void runNonPartExport() throws IOException, InterruptedException, ClassNotFoundException { + if (fs.exists(inputLocation)) { + fs.delete(inputLocation, true); + } + setupMRExport(new String[] { + "237,Krishna,01/01/1990,M,IN,TN\n", + "238,Kalpana,01/01/2000,F,IN,KA\n", + "239,Satya,01/01/2001,M,US,TN\n", + "240,Kavya,01/01/2002,F,US,KA\n" + + }); + HCatEximOutputFormat.setOutput( + job, + dbName, + tblName, + inputLocation.toString(), + null, + null, + schema); + + job.waitForCompletion(true); + HCatEximOutputCommitter committer = new HCatEximOutputCommitter(job,null); + committer.cleanupJob(job); + } + + private void runPartExport(String record, String country, String state) throws IOException, InterruptedException, ClassNotFoundException { + setupMRExport(new String[] {record}); + List partValues = new ArrayList(2); + partValues.add(country); + partValues.add(state); + HCatEximOutputFormat.setOutput( + job, + dbName, + tblName, + inputLocation.toString(), + partSchema , + partValues , + schema); + + job.waitForCompletion(true); + HCatEximOutputCommitter committer = new HCatEximOutputCommitter(job,null); + committer.cleanupJob(job); + } + + public void testNonPart() throws Exception { + try { + runNonPartExport(); + setUp(); + setupMRImport(); + HCatEximInputFormat.setInput(job, "tmp/exports", null); + job.waitForCompletion(true); + + assertEquals(4, TestImport.empRecords.size()); + assertEmpDetail(TestImport.empRecords.get(237), "Krishna", "01/01/1990", "M", null, null); + assertEmpDetail(TestImport.empRecords.get(238), "Kalpana", "01/01/2000", "F", null, null); + assertEmpDetail(TestImport.empRecords.get(239), "Satya", "01/01/2001", "M", null, null); + assertEmpDetail(TestImport.empRecords.get(240), "Kavya", "01/01/2002", "F", null, null); + } catch (Exception e) { + System.out.println("Test failed with " + e.getMessage()); + e.printStackTrace(); + throw e; + } + } + + public void testNonPartProjection() throws Exception { + try { + + runNonPartExport(); + setUp(); + setupMRImport(); + HCatEximInputFormat.setInput(job, "tmp/exports", null); + + List readColumns = new ArrayList(); + readColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_id", + Constants.INT_TYPE_NAME, ""))); + readColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_name", + Constants.STRING_TYPE_NAME, ""))); + + HCatEximInputFormat.setOutputSchema(job, new HCatSchema(readColumns)); + job.waitForCompletion(true); + + assertEquals(4, TestImport.empRecords.size()); + assertEmpDetail(TestImport.empRecords.get(237), "Krishna", null, null, null, null); + assertEmpDetail(TestImport.empRecords.get(238), "Kalpana", null, null, null, null); + assertEmpDetail(TestImport.empRecords.get(239), "Satya", null, null, null, null); + assertEmpDetail(TestImport.empRecords.get(240), "Kavya", null, null, null, null); + } catch (Exception e) { + System.out.println("Test failed with " + e.getMessage()); + e.printStackTrace(); + throw e; + } + } + + public void testPart() throws Exception { + try { + if (fs.exists(inputLocation)) { + fs.delete(inputLocation, true); + } + + List partKeys = new ArrayList(2); + partKeys.add(new HCatFieldSchema("emp_country", HCatFieldSchema.Type.STRING, "")); + partKeys.add(new HCatFieldSchema("emp_state", HCatFieldSchema.Type.STRING, "")); + partSchema = new HCatSchema(partKeys); + + runPartExport("237,Krishna,01/01/1990,M,IN,TN", "in", "tn"); + setUp(); + runPartExport("238,Kalpana,01/01/2000,F,IN,KA\n", "in", "ka"); + setUp(); + runPartExport("239,Satya,01/01/2001,M,US,TN\n", "us", "tn"); + setUp(); + runPartExport("240,Kavya,01/01/2002,F,US,KA\n", "us", "ka"); + + setUp(); + setupMRImport(); + HCatEximInputFormat.setInput(job, "tmp/exports", null); + job.waitForCompletion(true); + + assertEquals(4, TestImport.empRecords.size()); + assertEmpDetail(TestImport.empRecords.get(237), "Krishna", "01/01/1990", "M", "in", "tn"); + assertEmpDetail(TestImport.empRecords.get(238), "Kalpana", "01/01/2000", "F", "in", "ka"); + assertEmpDetail(TestImport.empRecords.get(239), "Satya", "01/01/2001", "M", "us", "tn"); + assertEmpDetail(TestImport.empRecords.get(240), "Kavya", "01/01/2002", "F", "us", "ka"); + } catch (Exception e) { + System.out.println("Test failed with " + e.getMessage()); + e.printStackTrace(); + throw e; + } + } + + public void testPartWithPartCols() throws Exception { + try { + if (fs.exists(inputLocation)) { + fs.delete(inputLocation, true); + } + + List partKeys = new ArrayList(2); + partKeys.add(new HCatFieldSchema("emp_country", HCatFieldSchema.Type.STRING, "")); + partKeys.add(new HCatFieldSchema("emp_state", HCatFieldSchema.Type.STRING, "")); + partSchema = new HCatSchema(partKeys); + + runPartExport("237,Krishna,01/01/1990,M,IN,TN", "in", "tn"); + setUp(); + runPartExport("238,Kalpana,01/01/2000,F,IN,KA\n", "in", "ka"); + setUp(); + runPartExport("239,Satya,01/01/2001,M,US,TN\n", "us", "tn"); + setUp(); + runPartExport("240,Kavya,01/01/2002,F,US,KA\n", "us", "ka"); + + setUp(); + setupMRImport(); + HCatEximInputFormat.setInput(job, "tmp/exports", null); + + List colsPlusPartKeys = new ArrayList(); + colsPlusPartKeys.addAll(columns); + colsPlusPartKeys.addAll(partKeys); + + HCatBaseInputFormat.setOutputSchema(job, new HCatSchema(colsPlusPartKeys)); + job.waitForCompletion(true); + + assertEquals(4, TestImport.empRecords.size()); + assertEmpDetail(TestImport.empRecords.get(237), "Krishna", "01/01/1990", "M", "in", "tn"); + assertEmpDetail(TestImport.empRecords.get(238), "Kalpana", "01/01/2000", "F", "in", "ka"); + assertEmpDetail(TestImport.empRecords.get(239), "Satya", "01/01/2001", "M", "us", "tn"); + assertEmpDetail(TestImport.empRecords.get(240), "Kavya", "01/01/2002", "F", "us", "ka"); + } catch (Exception e) { + System.out.println("Test failed with " + e.getMessage()); + e.printStackTrace(); + throw e; + } + } + + + public void testPartSelection() throws Exception { + try { + if (fs.exists(inputLocation)) { + fs.delete(inputLocation, true); + } + + List partKeys = new ArrayList(2); + partKeys.add(new HCatFieldSchema("emp_country", HCatFieldSchema.Type.STRING, "")); + partKeys.add(new HCatFieldSchema("emp_state", HCatFieldSchema.Type.STRING, "")); + partSchema = new HCatSchema(partKeys); + + runPartExport("237,Krishna,01/01/1990,M,IN,TN", "in", "tn"); + setUp(); + runPartExport("238,Kalpana,01/01/2000,F,IN,KA\n", "in", "ka"); + setUp(); + runPartExport("239,Satya,01/01/2001,M,US,TN\n", "us", "tn"); + setUp(); + runPartExport("240,Kavya,01/01/2002,F,US,KA\n", "us", "ka"); + + setUp(); + setupMRImport(); + Map filter = new TreeMap(); + filter.put("emp_state", "ka"); + HCatEximInputFormat.setInput(job, "tmp/exports", filter); + job.waitForCompletion(true); + + assertEquals(2, TestImport.empRecords.size()); + assertEmpDetail(TestImport.empRecords.get(238), "Kalpana", "01/01/2000", "F", "in", "ka"); + assertEmpDetail(TestImport.empRecords.get(240), "Kavya", "01/01/2002", "F", "us", "ka"); + } catch (Exception e) { + System.out.println("Test failed with " + e.getMessage()); + e.printStackTrace(); + throw e; + } + } + + + private void assertEmpDetail(EmpDetails empDetails, String name, String dob, String mf, String country, String state) { + assertNotNull(empDetails); + assertEquals(name, empDetails.emp_name); + assertEquals(dob, empDetails.emp_dob); + assertEquals(mf, empDetails.emp_sex); + assertEquals(country, empDetails.emp_country); + assertEquals(state, empDetails.emp_state); + } + +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatEximOutputFormat.java.broken hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatEximOutputFormat.java.broken new file mode 100644 index 0000000..bf4fb48 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatEximOutputFormat.java.broken @@ -0,0 +1,261 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hcatalog.mapreduce; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.parse.EximUtil; +import org.apache.hadoop.hive.serde.Constants; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hcatalog.common.HCatConstants; +import org.apache.hcatalog.common.HCatUtil; +import org.apache.hcatalog.data.DefaultHCatRecord; +import org.apache.hcatalog.data.HCatRecord; +import org.apache.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hcatalog.data.schema.HCatSchema; +import org.apache.hcatalog.data.schema.HCatSchemaUtils; + +/** + * + * TestHCatEximOutputFormat. Some basic testing here. More testing done via + * TestHCatEximInputFormat + * + */ +public class TestHCatEximOutputFormat extends TestCase { + + public static class TestMap extends + Mapper { + + private HCatSchema recordSchema; + + @Override + protected void setup(Context context) throws IOException, + InterruptedException { + super.setup(context); + recordSchema = HCatEximOutputFormat.getTableSchema(context); + System.out.println("TestMap/setup called"); + } + + @Override + public void map(LongWritable key, Text value, Context context) + throws IOException, InterruptedException { + String[] cols = value.toString().split(","); + HCatRecord record = new DefaultHCatRecord(recordSchema.size()); + System.out.println("TestMap/map called. Cols[0]:" + cols[0]); + System.out.println("TestMap/map called. Cols[1]:" + cols[1]); + System.out.println("TestMap/map called. Cols[2]:" + cols[2]); + System.out.println("TestMap/map called. Cols[3]:" + cols[3]); + record.setInteger("emp_id", recordSchema, Integer.parseInt(cols[0])); + record.setString("emp_name", recordSchema, cols[1]); + record.setString("emp_dob", recordSchema, cols[2]); + record.setString("emp_sex", recordSchema, cols[3]); + context.write(key, record); + } + } + + + private static final String dbName = "hcatEximOutputFormatTestDB"; + private static final String tblName = "hcatEximOutputFormatTestTable"; + Configuration conf; + Job job; + List columns; + HCatSchema schema; + FileSystem fs; + Path outputLocation; + Path dataLocation; + + public void testNonPart() throws Exception { + try { + HCatEximOutputFormat.setOutput( + job, + dbName, + tblName, + outputLocation.toString(), + null, + null, + schema); + + job.waitForCompletion(true); + HCatEximOutputCommitter committer = new HCatEximOutputCommitter(job,null); + committer.cleanupJob(job); + + Path metadataPath = new Path(outputLocation, "_metadata"); + Map.Entry> rv = EximUtil.readMetaData(fs, metadataPath); + Table table = rv.getKey(); + List partitions = rv.getValue(); + + assertEquals(dbName, table.getDbName()); + assertEquals(tblName, table.getTableName()); + assertTrue(EximUtil.schemaCompare(table.getSd().getCols(), + HCatUtil.getFieldSchemaList(columns))); + assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", + table.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); + assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", + table.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); + assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat", + table.getSd().getInputFormat()); + assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat", + table.getSd().getOutputFormat()); + assertEquals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe", + table.getSd().getSerdeInfo().getSerializationLib()); + assertEquals(0, table.getPartitionKeys().size()); + + assertEquals(0, partitions.size()); + } catch (Exception e) { + System.out.println("Test failed with " + e.getMessage()); + e.printStackTrace(); + throw e; + } + + } + + public void testPart() throws Exception { + try { + List partKeys = new ArrayList(); + partKeys.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_country", + Constants.STRING_TYPE_NAME, ""))); + partKeys.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_state", + Constants.STRING_TYPE_NAME, ""))); + HCatSchema partitionSchema = new HCatSchema(partKeys); + + List partitionVals = new ArrayList(); + partitionVals.add("IN"); + partitionVals.add("TN"); + + HCatEximOutputFormat.setOutput( + job, + dbName, + tblName, + outputLocation.toString(), + partitionSchema, + partitionVals, + schema); + + job.waitForCompletion(true); + HCatEximOutputCommitter committer = new HCatEximOutputCommitter(job,null); + committer.cleanupJob(job); + Path metadataPath = new Path(outputLocation, "_metadata"); + Map.Entry> rv = EximUtil.readMetaData(fs, metadataPath); + Table table = rv.getKey(); + List partitions = rv.getValue(); + + assertEquals(dbName, table.getDbName()); + assertEquals(tblName, table.getTableName()); + assertTrue(EximUtil.schemaCompare(table.getSd().getCols(), + HCatUtil.getFieldSchemaList(columns))); + assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", + table.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); + assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", + table.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); + assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat", + table.getSd().getInputFormat()); + assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat", + table.getSd().getOutputFormat()); + assertEquals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe", + table.getSd().getSerdeInfo().getSerializationLib()); + assertEquals(2, table.getPartitionKeys().size()); + List partSchema = table.getPartitionKeys(); + assertEquals("emp_country", partSchema.get(0).getName()); + assertEquals("emp_state", partSchema.get(1).getName()); + + assertEquals(1, partitions.size()); + Partition partition = partitions.get(0); + assertEquals("IN", partition.getValues().get(0)); + assertEquals("TN", partition.getValues().get(1)); + assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", + partition.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); + assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", + partition.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); + } catch (Exception e) { + System.out.println("Test failed with " + e.getMessage()); + e.printStackTrace(); + throw e; + } + } + + @Override + protected void setUp() throws Exception { + System.out.println("Setup started"); + super.setUp(); + conf = new Configuration(); + job = new Job(conf, "test eximoutputformat"); + columns = new ArrayList(); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_id", + Constants.INT_TYPE_NAME, ""))); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_name", + Constants.STRING_TYPE_NAME, ""))); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_dob", + Constants.STRING_TYPE_NAME, ""))); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_sex", + Constants.STRING_TYPE_NAME, ""))); + schema = new HCatSchema(columns); + + fs = new LocalFileSystem(); + fs.initialize(fs.getWorkingDirectory().toUri(), new Configuration()); + outputLocation = new Path(fs.getWorkingDirectory(), "tmp/exports"); + if (fs.exists(outputLocation)) { + fs.delete(outputLocation, true); + } + dataLocation = new Path(fs.getWorkingDirectory(), "tmp/data"); + if (fs.exists(dataLocation)) { + fs.delete(dataLocation, true); + } + FSDataOutputStream ds = fs.create(dataLocation, true); + ds.writeBytes("237,Krishna,01/01/1990,M,IN,TN\n"); + ds.writeBytes("238,Kalpana,01/01/2000,F,IN,KA\n"); + ds.writeBytes("239,Satya,01/01/2001,M,US,TN\n"); + ds.writeBytes("240,Kavya,01/01/2002,F,US,KA\n"); + ds.close(); + + job.setInputFormatClass(TextInputFormat.class); + job.setOutputFormatClass(HCatEximOutputFormat.class); + TextInputFormat.setInputPaths(job, dataLocation); + job.setJarByClass(this.getClass()); + job.setMapperClass(TestMap.class); + job.setNumReduceTasks(0); + System.out.println("Setup done"); + } + + @Override + protected void tearDown() throws Exception { + System.out.println("Teardown started"); + super.tearDown(); + fs.delete(dataLocation, true); + fs.delete(outputLocation, true); + System.out.println("Teardown done"); + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalDynamicPartitioned.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalDynamicPartitioned.java new file mode 100644 index 0000000..673f2aa --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalDynamicPartitioned.java @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +public class TestHCatExternalDynamicPartitioned extends TestHCatDynamicPartitioned { + + @Override + protected Boolean isTableExternal() { + return true; + } + +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalHCatNonPartitioned.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalHCatNonPartitioned.java new file mode 100644 index 0000000..4dc2b94 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalHCatNonPartitioned.java @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +public class TestHCatExternalHCatNonPartitioned extends TestHCatNonPartitioned { + + @Override + protected Boolean isTableExternal() { + return true; + } + +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalPartitioned.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalPartitioned.java new file mode 100644 index 0000000..474e825 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatExternalPartitioned.java @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +public class TestHCatExternalPartitioned extends TestHCatPartitioned { + + @Override + protected Boolean isTableExternal() { + return true; + } + +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatHiveCompatibility.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatHiveCompatibility.java new file mode 100644 index 0000000..4cf3b08 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatHiveCompatibility.java @@ -0,0 +1,129 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import java.io.File; +import java.io.FileWriter; +import java.util.Arrays; +import java.util.Iterator; + +import junit.framework.Assert; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.pig.ExecType; +import org.apache.pig.PigServer; +import org.apache.pig.data.Tuple; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestHCatHiveCompatibility extends HCatBaseTest { + private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; + + @BeforeClass + public static void createInputData() throws Exception { + int LOOP_SIZE = 11; + File file = new File(INPUT_FILE_NAME); + file.deleteOnExit(); + FileWriter writer = new FileWriter(file); + for (int i = 0; i < LOOP_SIZE; i++) { + writer.write(i + "\t1\n"); + } + writer.close(); + } + + @Test + public void testUnpartedReadWrite() throws Exception { + + driver.run("drop table if exists junit_unparted_noisd"); + String createTable = "create table junit_unparted_noisd(a int) stored as RCFILE"; + Assert.assertEquals(0, driver.run(createTable).getResponseCode()); + + // assert that the table created has no hcat instrumentation, and that we're still able to read it. + Table table = client.getTable("default", "junit_unparted_noisd"); + Assert.assertTrue(table.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); + + PigServer server = new PigServer(ExecType.LOCAL); + logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (a:int);"); + logAndRegister(server, "store A into 'default.junit_unparted_noisd' using org.apache.hive.hcatalog.pig.HCatStorer();"); + logAndRegister(server, "B = load 'default.junit_unparted_noisd' using org.apache.hive.hcatalog.pig.HCatLoader();"); + Iterator itr = server.openIterator("B"); + + int i = 0; + + while (itr.hasNext()) { + Tuple t = itr.next(); + Assert.assertEquals(1, t.size()); + Assert.assertEquals(t.get(0), i); + i++; + } + + Assert.assertFalse(itr.hasNext()); + Assert.assertEquals(11, i); + + // assert that the table created still has no hcat instrumentation + Table table2 = client.getTable("default", "junit_unparted_noisd"); + Assert.assertTrue(table2.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); + + driver.run("drop table junit_unparted_noisd"); + } + + @Test + public void testPartedRead() throws Exception { + + driver.run("drop table if exists junit_parted_noisd"); + String createTable = "create table junit_parted_noisd(a int) partitioned by (b string) stored as RCFILE"; + Assert.assertEquals(0, driver.run(createTable).getResponseCode()); + + // assert that the table created has no hcat instrumentation, and that we're still able to read it. + Table table = client.getTable("default", "junit_parted_noisd"); + Assert.assertTrue(table.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); + + PigServer server = new PigServer(ExecType.LOCAL); + logAndRegister(server, "A = load '" + INPUT_FILE_NAME + "' as (a:int);"); + logAndRegister(server, "store A into 'default.junit_parted_noisd' using org.apache.hive.hcatalog.pig.HCatStorer('b=42');"); + logAndRegister(server, "B = load 'default.junit_parted_noisd' using org.apache.hive.hcatalog.pig.HCatLoader();"); + Iterator itr = server.openIterator("B"); + + int i = 0; + + while (itr.hasNext()) { + Tuple t = itr.next(); + Assert.assertEquals(2, t.size()); // Contains explicit field "a" and partition "b". + Assert.assertEquals(t.get(0), i); + Assert.assertEquals(t.get(1), "42"); + i++; + } + + Assert.assertFalse(itr.hasNext()); + Assert.assertEquals(11, i); + + // assert that the table created still has no hcat instrumentation + Table table2 = client.getTable("default", "junit_parted_noisd"); + Assert.assertTrue(table2.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); + + // assert that there is one partition present, and it had hcat instrumentation inserted when it was created. + Partition ptn = client.getPartition("default", "junit_parted_noisd", Arrays.asList("42")); + + Assert.assertNotNull(ptn); + Assert.assertTrue(ptn.getSd().getInputFormat().equals(HCatConstants.HIVE_RCFILE_IF_CLASS)); + driver.run("drop table junit_unparted_noisd"); + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatHiveThriftCompatibility.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatHiveThriftCompatibility.java new file mode 100644 index 0000000..4dd0506 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatHiveThriftCompatibility.java @@ -0,0 +1,116 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import junit.framework.Assert; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.serde2.thrift.test.IntString; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.pig.ExecType; +import org.apache.pig.PigServer; +import org.apache.pig.data.DataType; +import org.apache.pig.data.Tuple; +import org.apache.pig.impl.logicalLayer.schema.Schema; +import org.apache.thrift.protocol.TBinaryProtocol; +import org.apache.thrift.transport.TIOStreamTransport; +import org.junit.Before; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.util.Iterator; + +public class TestHCatHiveThriftCompatibility extends HCatBaseTest { + + private boolean setUpComplete = false; + private Path intStringSeq; + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + if (setUpComplete) { + return; + } + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + TIOStreamTransport transport = new TIOStreamTransport(out); + TBinaryProtocol protocol = new TBinaryProtocol(transport); + + IntString intString = new IntString(1, "one", 1); + intString.write(protocol); + BytesWritable bytesWritable = new BytesWritable(out.toByteArray()); + + intStringSeq = new Path(TEST_DATA_DIR + "/data/intString.seq"); + LOG.info("Creating data file: " + intStringSeq); + + SequenceFile.Writer seqFileWriter = SequenceFile.createWriter( + intStringSeq.getFileSystem(hiveConf), hiveConf, intStringSeq, + NullWritable.class, BytesWritable.class); + seqFileWriter.append(NullWritable.get(), bytesWritable); + seqFileWriter.close(); + + setUpComplete = true; + } + + /** + * Create a table with no explicit schema and ensure its correctly + * discovered from the thrift struct. + */ + @Test + public void testDynamicCols() throws Exception { + Assert.assertEquals(0, driver.run("drop table if exists test_thrift").getResponseCode()); + Assert.assertEquals(0, driver.run( + "create external table test_thrift " + + "partitioned by (year string) " + + "row format serde 'org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer' " + + "with serdeproperties ( " + + " 'serialization.class'='org.apache.hadoop.hive.serde2.thrift.test.IntString', " + + " 'serialization.format'='org.apache.thrift.protocol.TBinaryProtocol') " + + "stored as" + + " inputformat 'org.apache.hadoop.mapred.SequenceFileInputFormat'" + + " outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'") + .getResponseCode()); + Assert.assertEquals(0, + driver.run("alter table test_thrift add partition (year = '2012') location '" + + intStringSeq.getParent() + "'").getResponseCode()); + + PigServer pigServer = new PigServer(ExecType.LOCAL); + pigServer.registerQuery("A = load 'test_thrift' using org.apache.hive.hcatalog.pig.HCatLoader();"); + + Schema expectedSchema = new Schema(); + expectedSchema.add(new Schema.FieldSchema("myint", DataType.INTEGER)); + expectedSchema.add(new Schema.FieldSchema("mystring", DataType.CHARARRAY)); + expectedSchema.add(new Schema.FieldSchema("underscore_int", DataType.INTEGER)); + expectedSchema.add(new Schema.FieldSchema("year", DataType.CHARARRAY)); + + Assert.assertEquals(expectedSchema, pigServer.dumpSchema("A")); + + Iterator iterator = pigServer.openIterator("A"); + Tuple t = iterator.next(); + Assert.assertEquals(1, t.get(0)); + Assert.assertEquals("one", t.get(1)); + Assert.assertEquals(1, t.get(2)); + Assert.assertEquals("2012", t.get(3)); + + Assert.assertFalse(iterator.hasNext()); + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatInputFormat.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatInputFormat.java new file mode 100644 index 0000000..765ea14 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatInputFormat.java @@ -0,0 +1,148 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import junit.framework.Assert; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.serde2.thrift.test.IntString; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.thrift.protocol.TBinaryProtocol; +import org.apache.thrift.transport.TIOStreamTransport; +import org.junit.Before; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +public class TestHCatInputFormat extends HCatBaseTest { + + private boolean setUpComplete = false; + + /** + * Create an input sequence file with 100 records; every 10th record is bad. + * Load this table into Hive. + */ + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + if (setUpComplete) { + return; + } + + Path intStringSeq = new Path(TEST_DATA_DIR + "/data/intString.seq"); + LOG.info("Creating data file: " + intStringSeq); + SequenceFile.Writer seqFileWriter = SequenceFile.createWriter( + intStringSeq.getFileSystem(hiveConf), hiveConf, intStringSeq, + NullWritable.class, BytesWritable.class); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + TIOStreamTransport transport = new TIOStreamTransport(out); + TBinaryProtocol protocol = new TBinaryProtocol(transport); + + for (int i = 1; i <= 100; i++) { + if (i % 10 == 0) { + seqFileWriter.append(NullWritable.get(), new BytesWritable("bad record".getBytes())); + } else { + out.reset(); + IntString intString = new IntString(i, Integer.toString(i), i); + intString.write(protocol); + BytesWritable bytesWritable = new BytesWritable(out.toByteArray()); + seqFileWriter.append(NullWritable.get(), bytesWritable); + } + } + + seqFileWriter.close(); + + // Now let's load this file into a new Hive table. + Assert.assertEquals(0, driver.run("drop table if exists test_bad_records").getResponseCode()); + Assert.assertEquals(0, driver.run( + "create table test_bad_records " + + "row format serde 'org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer' " + + "with serdeproperties ( " + + " 'serialization.class'='org.apache.hadoop.hive.serde2.thrift.test.IntString', " + + " 'serialization.format'='org.apache.thrift.protocol.TBinaryProtocol') " + + "stored as" + + " inputformat 'org.apache.hadoop.mapred.SequenceFileInputFormat'" + + " outputformat 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'") + .getResponseCode()); + Assert.assertEquals(0, driver.run("load data local inpath '" + intStringSeq.getParent() + + "' into table test_bad_records").getResponseCode()); + + setUpComplete = true; + } + + @Test + public void testBadRecordHandlingPasses() throws Exception { + Assert.assertTrue(runJob(0.1f)); + } + + @Test + public void testBadRecordHandlingFails() throws Exception { + Assert.assertFalse(runJob(0.01f)); + } + + private boolean runJob(float badRecordThreshold) throws Exception { + Configuration conf = new Configuration(); + + conf.setFloat(HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY, badRecordThreshold); + + Job job = new Job(conf); + job.setJarByClass(this.getClass()); + job.setMapperClass(MyMapper.class); + + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + + HCatInputFormat.setInput(job, "default", "test_bad_records"); + + job.setMapOutputKeyClass(HCatRecord.class); + job.setMapOutputValueClass(HCatRecord.class); + + job.setNumReduceTasks(0); + + Path path = new Path(TEST_DATA_DIR, "test_bad_record_handling_output"); + if (path.getFileSystem(conf).exists(path)) { + path.getFileSystem(conf).delete(path, true); + } + + TextOutputFormat.setOutputPath(job, path); + + return job.waitForCompletion(true); + } + + public static class MyMapper extends Mapper { + @Override + public void map(NullWritable key, HCatRecord value, Context context) + throws IOException, InterruptedException { + LOG.info("HCatRecord: " + value); + context.write(NullWritable.get(), new Text(value.toString())); + } + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java new file mode 100644 index 0000000..2b435a7 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatMultiOutputFormat.java @@ -0,0 +1,430 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStore; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.exec.FetchTask; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.plan.FetchWork; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.MiniMRCluster; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils; +import org.apache.hive.hcatalog.mapreduce.MultiOutputFormat.JobConfigurer; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestHCatMultiOutputFormat { + + private static final Logger LOG = LoggerFactory.getLogger(TestHCatMultiOutputFormat.class); + + private static final String DATABASE = "default"; + private static final String[] tableNames = {"test1", "test2", "test3"}; + private static final String[] tablePerms = {"755", "750", "700"}; + private static Path warehousedir = null; + private static HashMap schemaMap = new HashMap(); + private static HiveMetaStoreClient hmsc; + private static MiniMRCluster mrCluster; + private static Configuration mrConf; + private static HiveConf hiveConf; + private static File workDir; + + private static final String msPort = "20199"; + private static Thread t; + + static { + schemaMap.put(tableNames[0], new HCatSchema(ColumnHolder.hCattest1Cols)); + schemaMap.put(tableNames[1], new HCatSchema(ColumnHolder.hCattest2Cols)); + schemaMap.put(tableNames[2], new HCatSchema(ColumnHolder.hCattest3Cols)); + } + + private static class RunMS implements Runnable { + + @Override + public void run() { + try { + String warehouseConf = HiveConf.ConfVars.METASTOREWAREHOUSE.varname + "=" + + warehousedir.toString(); + HiveMetaStore.main(new String[]{"-v", "-p", msPort, "--hiveconf", warehouseConf}); + } catch (Throwable t) { + System.err.println("Exiting. Got exception from metastore: " + t.getMessage()); + } + } + + } + + /** + * Private class which holds all the data for the test cases + */ + private static class ColumnHolder { + + private static ArrayList hCattest1Cols = new ArrayList(); + private static ArrayList hCattest2Cols = new ArrayList(); + private static ArrayList hCattest3Cols = new ArrayList(); + + private static ArrayList partitionCols = new ArrayList(); + private static ArrayList test1Cols = new ArrayList(); + private static ArrayList test2Cols = new ArrayList(); + private static ArrayList test3Cols = new ArrayList(); + + private static HashMap> colMapping = new HashMap>(); + + static { + try { + FieldSchema keyCol = new FieldSchema("key", serdeConstants.STRING_TYPE_NAME, ""); + test1Cols.add(keyCol); + test2Cols.add(keyCol); + test3Cols.add(keyCol); + hCattest1Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); + hCattest2Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); + hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(keyCol)); + FieldSchema valueCol = new FieldSchema("value", serdeConstants.STRING_TYPE_NAME, ""); + test1Cols.add(valueCol); + test3Cols.add(valueCol); + hCattest1Cols.add(HCatSchemaUtils.getHCatFieldSchema(valueCol)); + hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(valueCol)); + FieldSchema extraCol = new FieldSchema("extra", serdeConstants.STRING_TYPE_NAME, ""); + test3Cols.add(extraCol); + hCattest3Cols.add(HCatSchemaUtils.getHCatFieldSchema(extraCol)); + colMapping.put("test1", test1Cols); + colMapping.put("test2", test2Cols); + colMapping.put("test3", test3Cols); + } catch (HCatException e) { + LOG.error("Error in setting up schema fields for the table", e); + throw new RuntimeException(e); + } + } + + static { + partitionCols.add(new FieldSchema("ds", serdeConstants.STRING_TYPE_NAME, "")); + partitionCols.add(new FieldSchema("cluster", serdeConstants.STRING_TYPE_NAME, "")); + } + } + + @BeforeClass + public static void setup() throws Exception { + String testDir = System.getProperty("test.data.dir", "./"); + testDir = testDir + "/test_multitable_" + Math.abs(new Random().nextLong()) + "/"; + workDir = new File(new File(testDir).getCanonicalPath()); + FileUtil.fullyDelete(workDir); + workDir.mkdirs(); + + warehousedir = new Path(workDir + "/warehouse"); + + // Run hive metastore server + t = new Thread(new RunMS()); + t.start(); + + // LocalJobRunner does not work with mapreduce OutputCommitter. So need + // to use MiniMRCluster. MAPREDUCE-2350 + Configuration conf = new Configuration(true); + conf.set("yarn.scheduler.capacity.root.queues", "default"); + conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); + + FileSystem fs = FileSystem.get(conf); + System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); + mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, + new JobConf(conf)); + mrConf = mrCluster.createJobConf(); + fs.mkdirs(warehousedir); + + initializeSetup(); + } + + private static void initializeSetup() throws Exception { + + hiveConf = new HiveConf(mrConf, TestHCatMultiOutputFormat.class); + hiveConf.set("hive.metastore.local", "false"); + hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + msPort); + hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); + hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); + hiveConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); + System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); + + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, warehousedir.toString()); + try { + hmsc = new HiveMetaStoreClient(hiveConf, null); + initalizeTables(); + } catch (Throwable e) { + LOG.error("Exception encountered while setting up testcase", e); + throw new Exception(e); + } finally { + hmsc.close(); + } + } + + private static void initalizeTables() throws Exception { + for (String table : tableNames) { + try { + if (hmsc.getTable(DATABASE, table) != null) { + hmsc.dropTable(DATABASE, table); + } + } catch (NoSuchObjectException ignored) { + } + } + for (int i = 0; i < tableNames.length; i++) { + createTable(tableNames[i], tablePerms[i]); + } + } + + private static void createTable(String tableName, String tablePerm) throws Exception { + Table tbl = new Table(); + tbl.setDbName(DATABASE); + tbl.setTableName(tableName); + StorageDescriptor sd = new StorageDescriptor(); + sd.setCols(ColumnHolder.colMapping.get(tableName)); + tbl.setSd(sd); + sd.setParameters(new HashMap()); + sd.setSerdeInfo(new SerDeInfo()); + sd.getSerdeInfo().setName(tbl.getTableName()); + sd.getSerdeInfo().setParameters(new HashMap()); + sd.setInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat.class.getName()); + sd.setOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat.class.getName()); + sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); + sd.getSerdeInfo().setSerializationLib( + org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe.class.getName()); + tbl.setPartitionKeys(ColumnHolder.partitionCols); + + hmsc.createTable(tbl); + FileSystem fs = FileSystem.get(mrConf); + fs.setPermission(new Path(warehousedir, tableName), new FsPermission(tablePerm)); + } + + @AfterClass + public static void tearDown() throws IOException { + FileUtil.fullyDelete(workDir); + FileSystem fs = FileSystem.get(mrConf); + if (fs.exists(warehousedir)) { + fs.delete(warehousedir, true); + } + if (mrCluster != null) { + mrCluster.shutdown(); + } + } + + /** + * Simple test case. + *
    + *
  1. Submits a mapred job which writes out one fixed line to each of the tables
  2. + *
  3. uses hive fetch task to read the data and see if it matches what was written
  4. + *
+ * + * @throws Exception if any error occurs + */ + @Test + public void testOutputFormat() throws Throwable { + HashMap partitionValues = new HashMap(); + partitionValues.put("ds", "1"); + partitionValues.put("cluster", "ag"); + ArrayList infoList = new ArrayList(); + infoList.add(OutputJobInfo.create("default", tableNames[0], partitionValues)); + infoList.add(OutputJobInfo.create("default", tableNames[1], partitionValues)); + infoList.add(OutputJobInfo.create("default", tableNames[2], partitionValues)); + + Job job = new Job(hiveConf, "SampleJob"); + + job.setMapperClass(MyMapper.class); + job.setInputFormatClass(TextInputFormat.class); + job.setOutputFormatClass(MultiOutputFormat.class); + job.setNumReduceTasks(0); + + JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); + + for (int i = 0; i < tableNames.length; i++) { + configurer.addOutputFormat(tableNames[i], HCatOutputFormat.class, BytesWritable.class, + HCatRecord.class); + HCatOutputFormat.setOutput(configurer.getJob(tableNames[i]), infoList.get(i)); + HCatOutputFormat.setSchema(configurer.getJob(tableNames[i]), + schemaMap.get(tableNames[i])); + } + configurer.configure(); + + Path filePath = createInputFile(); + FileInputFormat.addInputPath(job, filePath); + Assert.assertTrue(job.waitForCompletion(true)); + + ArrayList outputs = new ArrayList(); + for (String tbl : tableNames) { + outputs.add(getTableData(tbl, "default").get(0)); + } + Assert.assertEquals("Comparing output of table " + + tableNames[0] + " is not correct", outputs.get(0), "a,a,1,ag"); + Assert.assertEquals("Comparing output of table " + + tableNames[1] + " is not correct", outputs.get(1), "a,1,ag"); + Assert.assertEquals("Comparing output of table " + + tableNames[2] + " is not correct", outputs.get(2), "a,a,extra,1,ag"); + + // Check permisssion on partition dirs and files created + for (int i = 0; i < tableNames.length; i++) { + Path partitionFile = new Path(warehousedir + "/" + tableNames[i] + + "/ds=1/cluster=ag/part-m-00000"); + FileSystem fs = partitionFile.getFileSystem(mrConf); + Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct", + fs.getFileStatus(partitionFile).getPermission(), + new FsPermission(tablePerms[i])); + Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct", + fs.getFileStatus(partitionFile.getParent()).getPermission(), + new FsPermission(tablePerms[i])); + Assert.assertEquals("File permissions of table " + tableNames[i] + " is not correct", + fs.getFileStatus(partitionFile.getParent().getParent()).getPermission(), + new FsPermission(tablePerms[i])); + + } + LOG.info("File permissions verified"); + } + + /** + * Create a input file for map + * + * @return absolute path of the file. + * @throws IOException if any error encountered + */ + private Path createInputFile() throws IOException { + Path f = new Path(workDir + "/MultiTableInput.txt"); + FileSystem fs = FileSystem.get(mrConf); + if (fs.exists(f)) { + fs.delete(f, true); + } + OutputStream out = fs.create(f); + for (int i = 0; i < 3; i++) { + out.write("a,a\n".getBytes()); + } + out.close(); + return f; + } + + /** + * Method to fetch table data + * + * @param table table name + * @param database database + * @return list of columns in comma seperated way + * @throws Exception if any error occurs + */ + private List getTableData(String table, String database) throws Exception { + HiveConf conf = new HiveConf(); + conf.addResource("hive-site.xml"); + ArrayList results = new ArrayList(); + ArrayList temp = new ArrayList(); + Hive hive = Hive.get(conf); + org.apache.hadoop.hive.ql.metadata.Table tbl = hive.getTable(database, table); + FetchWork work; + if (!tbl.getPartCols().isEmpty()) { + List partitions = hive.getPartitions(tbl); + List partDesc = new ArrayList(); + List partLocs = new ArrayList(); + for (Partition part : partitions) { + partLocs.add(part.getLocation()); + partDesc.add(Utilities.getPartitionDesc(part)); + } + work = new FetchWork(partLocs, partDesc, Utilities.getTableDesc(tbl)); + work.setLimit(100); + } else { + work = new FetchWork(tbl.getDataLocation().toString(), Utilities.getTableDesc(tbl)); + } + FetchTask task = new FetchTask(); + task.setWork(work); + task.initialize(conf, null, null); + task.fetch(temp); + for (String str : temp) { + results.add(str.replace("\t", ",")); + } + return results; + } + + private static class MyMapper extends + Mapper { + + private int i = 0; + + @Override + protected void map(LongWritable key, Text value, Context context) + throws IOException, InterruptedException { + HCatRecord record = null; + String[] splits = value.toString().split(","); + switch (i) { + case 0: + record = new DefaultHCatRecord(2); + record.set(0, splits[0]); + record.set(1, splits[1]); + break; + case 1: + record = new DefaultHCatRecord(1); + record.set(0, splits[0]); + break; + case 2: + record = new DefaultHCatRecord(3); + record.set(0, splits[0]); + record.set(1, splits[1]); + record.set(2, "extra"); + break; + default: + Assert.fail("This should not happen!!!!!"); + } + MultiOutputFormat.write(tableNames[i], null, record, context); + i++; + } + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java new file mode 100644 index 0000000..2368417 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatNonPartitioned.java @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hive.hcatalog.common.ErrorType; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils; +import org.junit.BeforeClass; +import org.junit.Test; + +import static junit.framework.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class TestHCatNonPartitioned extends HCatMapReduceTest { + + private static List writeRecords; + static List partitionColumns; + + @BeforeClass + public static void oneTimeSetUp() throws Exception { + + dbName = null; //test if null dbName works ("default" is used) + tableName = "testHCatNonPartitionedTable"; + + writeRecords = new ArrayList(); + + for (int i = 0; i < 20; i++) { + List objList = new ArrayList(); + + objList.add(i); + objList.add("strvalue" + i); + writeRecords.add(new DefaultHCatRecord(objList)); + } + + partitionColumns = new ArrayList(); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); + } + + @Override + protected List getPartitionKeys() { + List fields = new ArrayList(); + //empty list, non partitioned + return fields; + } + + @Override + protected List getTableColumns() { + List fields = new ArrayList(); + fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); + fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); + return fields; + } + + + @Test + public void testHCatNonPartitionedTable() throws Exception { + + Map partitionMap = new HashMap(); + runMRCreate(null, partitionColumns, writeRecords, 10, true); + + //Test for duplicate publish + IOException exc = null; + try { + runMRCreate(null, partitionColumns, writeRecords, 20, true); + } catch (IOException e) { + exc = e; + } + + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_NON_EMPTY_TABLE, ((HCatException) exc).getErrorType()); + + //Test for publish with invalid partition key name + exc = null; + partitionMap.clear(); + partitionMap.put("px", "p1value2"); + + try { + runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); + } catch (IOException e) { + exc = e; + } + + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_INVALID_PARTITION_VALUES, ((HCatException) exc).getErrorType()); + + //Read should get 10 rows + runMRRead(10); + + hiveReadTest(); + } + + //Test that data inserted through hcatoutputformat is readable from hive + private void hiveReadTest() throws Exception { + + String query = "select * from " + tableName; + int retCode = driver.run(query).getResponseCode(); + + if (retCode != 0) { + throw new Exception("Error " + retCode + " running query " + query); + } + + ArrayList res = new ArrayList(); + driver.getResults(res); + assertEquals(10, res.size()); + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatOutputFormat.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatOutputFormat.java new file mode 100644 index 0000000..4c71869 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatOutputFormat.java @@ -0,0 +1,167 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.io.RCFileInputFormat; +import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestHCatOutputFormat extends TestCase { + + private static final Logger LOG = LoggerFactory.getLogger(TestHCatOutputFormat.class); + private HiveMetaStoreClient client; + private HiveConf hiveConf; + + private static final String dbName = "hcatOutputFormatTestDB"; + private static final String tblName = "hcatOutputFormatTestTable"; + + @Override + protected void setUp() throws Exception { + super.setUp(); + hiveConf = new HiveConf(this.getClass()); + + try { + client = new HiveMetaStoreClient(hiveConf, null); + + initTable(); + } catch (Throwable e) { + LOG.error("Unable to open the metastore", e); + throw new Exception(e); + } + } + + @Override + protected void tearDown() throws Exception { + try { + super.tearDown(); + client.dropTable(dbName, tblName); + client.dropDatabase(dbName); + + client.close(); + } catch (Throwable e) { + LOG.error("Unable to close metastore", e); + throw new Exception(e); + } + } + + private void initTable() throws Exception { + + try { + client.dropTable(dbName, tblName); + } catch (Exception e) { + } + try { + client.dropDatabase(dbName); + } catch (Exception e) { + } + client.createDatabase(new Database(dbName, "", null, null)); + assertNotNull((client.getDatabase(dbName).getLocationUri())); + + List fields = new ArrayList(); + fields.add(new FieldSchema("colname", serdeConstants.STRING_TYPE_NAME, "")); + + Table tbl = new Table(); + tbl.setDbName(dbName); + tbl.setTableName(tblName); + StorageDescriptor sd = new StorageDescriptor(); + sd.setCols(fields); + tbl.setSd(sd); + + //sd.setLocation("hdfs://tmp"); + sd.setInputFormat(RCFileInputFormat.class.getName()); + sd.setOutputFormat(RCFileOutputFormat.class.getName()); + sd.setParameters(new HashMap()); + sd.getParameters().put("test_param_1", "Use this for comments etc"); + //sd.setBucketCols(new ArrayList(2)); + //sd.getBucketCols().add("name"); + sd.setSerdeInfo(new SerDeInfo()); + sd.getSerdeInfo().setName(tbl.getTableName()); + sd.getSerdeInfo().setParameters(new HashMap()); + sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); + sd.getSerdeInfo().setSerializationLib( + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName()); + tbl.setPartitionKeys(fields); + + Map tableParams = new HashMap(); + tableParams.put("hcat.testarg", "testArgValue"); + + tbl.setParameters(tableParams); + + client.createTable(tbl); + Path tblPath = new Path(client.getTable(dbName, tblName).getSd().getLocation()); + assertTrue(tblPath.getFileSystem(hiveConf).mkdirs(new Path(tblPath, "colname=p1"))); + + } + + public void testSetOutput() throws Exception { + Configuration conf = new Configuration(); + Job job = new Job(conf, "test outputformat"); + + Map partitionValues = new HashMap(); + partitionValues.put("colname", "p1"); + //null server url means local mode + OutputJobInfo info = OutputJobInfo.create(dbName, tblName, partitionValues); + + HCatOutputFormat.setOutput(job, info); + OutputJobInfo jobInfo = HCatOutputFormat.getJobInfo(job); + + assertNotNull(jobInfo.getTableInfo()); + assertEquals(1, jobInfo.getPartitionValues().size()); + assertEquals("p1", jobInfo.getPartitionValues().get("colname")); + assertEquals(1, jobInfo.getTableInfo().getDataColumns().getFields().size()); + assertEquals("colname", jobInfo.getTableInfo().getDataColumns().getFields().get(0).getName()); + + publishTest(job); + } + + public void publishTest(Job job) throws Exception { + OutputCommitter committer = new FileOutputCommitterContainer(job, null); + committer.commitJob(job); + + Partition part = client.getPartition(dbName, tblName, Arrays.asList("p1")); + assertNotNull(part); + + StorerInfo storer = InternalUtil.extractStorerInfo(part.getSd(), part.getParameters()); + assertEquals(storer.getProperties().get("hcat.testarg"), "testArgValue"); + assertTrue(part.getSd().getLocation().indexOf("p1") != -1); + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitionPublish.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitionPublish.java new file mode 100644 index 0000000..6341f64 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitionPublish.java @@ -0,0 +1,266 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.mapreduce; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; + +import junit.framework.Assert; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.io.RCFileInputFormat; +import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.MiniMRCluster; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hadoop.util.Shell; +import org.apache.hive.hcatalog.NoExitSecurityManager; +import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestHCatPartitionPublish { + private static Configuration mrConf = null; + private static FileSystem fs = null; + private static MiniMRCluster mrCluster = null; + private static boolean isServerRunning = false; + private static final int msPort = 20101; + private static HiveConf hcatConf; + private static HiveMetaStoreClient msc; + private static SecurityManager securityManager; + + @BeforeClass + public static void setup() throws Exception { + String testDir = System.getProperty("test.data.dir", "./"); + testDir = testDir + "/test_hcat_partitionpublish_" + Math.abs(new Random().nextLong()) + "/"; + File workDir = new File(new File(testDir).getCanonicalPath()); + FileUtil.fullyDelete(workDir); + workDir.mkdirs(); + Configuration conf = new Configuration(true); + conf.set("yarn.scheduler.capacity.root.queues", "default"); + conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); + + fs = FileSystem.get(conf); + System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); + // LocalJobRunner does not work with mapreduce OutputCommitter. So need + // to use MiniMRCluster. MAPREDUCE-2350 + mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, + new JobConf(conf)); + mrConf = mrCluster.createJobConf(); + + if (isServerRunning) { + return; + } + + MetaStoreUtils.startMetaStore(msPort, ShimLoader + .getHadoopThriftAuthBridge()); + Thread.sleep(10000); + isServerRunning = true; + securityManager = System.getSecurityManager(); + System.setSecurityManager(new NoExitSecurityManager()); + + hcatConf = new HiveConf(TestHCatPartitionPublish.class); + hcatConf.set("hive.metastore.local", "false"); + hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + + msPort); + hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); + hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, 3); + hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, + "false"); + msc = new HiveMetaStoreClient(hcatConf, null); + System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); + System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); + } + + @AfterClass + public static void tearDown() throws IOException { + if (mrCluster != null) { + mrCluster.shutdown(); + } + System.setSecurityManager(securityManager); + isServerRunning = false; + } + + @Test + public void testPartitionPublish() throws Exception { + String dbName = "default"; + String tableName = "testHCatPartitionedTable"; + createTable(null, tableName); + + Map partitionMap = new HashMap(); + partitionMap.put("part1", "p1value1"); + partitionMap.put("part0", "p0value1"); + + ArrayList hcatTableColumns = new ArrayList(); + for (FieldSchema fs : getTableColumns()) { + hcatTableColumns.add(HCatSchemaUtils.getHCatFieldSchema(fs)); + } + + runMRCreateFail(dbName, tableName, partitionMap, hcatTableColumns); + List ptns = msc.listPartitionNames(dbName, tableName, + (short) 10); + Assert.assertEquals(0, ptns.size()); + Table table = msc.getTable(dbName, tableName); + Assert.assertTrue(table != null); + // In Windows, we cannot remove the output directory when job fail. See + // FileOutputCommitterContainer.abortJob + if (!Shell.WINDOWS) { + Assert.assertFalse(fs.exists(new Path(table.getSd().getLocation() + + "/part1=p1value1/part0=p0value1"))); + } + } + + void runMRCreateFail( + String dbName, String tableName, Map partitionValues, + List columns) throws Exception { + + Job job = new Job(mrConf, "hcat mapreduce write fail test"); + job.setJarByClass(this.getClass()); + job.setMapperClass(TestHCatPartitionPublish.MapFail.class); + + // input/output settings + job.setInputFormatClass(TextInputFormat.class); + + Path path = new Path(fs.getWorkingDirectory(), + "mapred/testHCatMapReduceInput"); + // The write count does not matter, as the map will fail in its first + // call. + createInputFile(path, 5); + + TextInputFormat.setInputPaths(job, path); + job.setOutputFormatClass(HCatOutputFormat.class); + OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, + partitionValues); + HCatOutputFormat.setOutput(job, outputJobInfo); + + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(DefaultHCatRecord.class); + + job.setNumReduceTasks(0); + + HCatOutputFormat.setSchema(job, new HCatSchema(columns)); + + boolean success = job.waitForCompletion(true); + Assert.assertTrue(success == false); + } + + private void createInputFile(Path path, int rowCount) throws IOException { + if (fs.exists(path)) { + fs.delete(path, true); + } + FSDataOutputStream os = fs.create(path); + for (int i = 0; i < rowCount; i++) { + os.writeChars(i + "\n"); + } + os.close(); + } + + public static class MapFail extends + Mapper { + + @Override + public void map(LongWritable key, Text value, Context context) + throws IOException, InterruptedException { + { + throw new IOException("Exception to mimic job failure."); + } + } + } + + private void createTable(String dbName, String tableName) throws Exception { + String databaseName = (dbName == null) ? MetaStoreUtils.DEFAULT_DATABASE_NAME + : dbName; + try { + msc.dropTable(databaseName, tableName); + } catch (Exception e) { + } // can fail with NoSuchObjectException + + Table tbl = new Table(); + tbl.setDbName(databaseName); + tbl.setTableName(tableName); + tbl.setTableType("MANAGED_TABLE"); + StorageDescriptor sd = new StorageDescriptor(); + sd.setCols(getTableColumns()); + tbl.setPartitionKeys(getPartitionKeys()); + tbl.setSd(sd); + sd.setBucketCols(new ArrayList(2)); + sd.setSerdeInfo(new SerDeInfo()); + sd.getSerdeInfo().setName(tbl.getTableName()); + sd.getSerdeInfo().setParameters(new HashMap()); + sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); + sd.getSerdeInfo().setSerializationLib(ColumnarSerDe.class.getName()); + sd.setInputFormat(RCFileInputFormat.class.getName()); + sd.setOutputFormat(RCFileOutputFormat.class.getName()); + + Map tableParams = new HashMap(); + tbl.setParameters(tableParams); + + msc.createTable(tbl); + } + + protected List getPartitionKeys() { + List fields = new ArrayList(); + // Defining partition names in unsorted order + fields.add(new FieldSchema("PaRT1", serdeConstants.STRING_TYPE_NAME, "")); + fields.add(new FieldSchema("part0", serdeConstants.STRING_TYPE_NAME, "")); + return fields; + } + + protected List getTableColumns() { + List fields = new ArrayList(); + fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); + fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); + return fields; + } + +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitioned.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitioned.java new file mode 100644 index 0000000..c11af31 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestHCatPartitioned.java @@ -0,0 +1,351 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hive.hcatalog.common.ErrorType; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils; +import org.junit.BeforeClass; +import org.junit.Test; + +import static junit.framework.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class TestHCatPartitioned extends HCatMapReduceTest { + + private static List writeRecords; + private static List partitionColumns; + + @BeforeClass + public static void oneTimeSetUp() throws Exception { + + tableName = "testHCatPartitionedTable"; + writeRecords = new ArrayList(); + + for (int i = 0; i < 20; i++) { + List objList = new ArrayList(); + + objList.add(i); + objList.add("strvalue" + i); + writeRecords.add(new DefaultHCatRecord(objList)); + } + + partitionColumns = new ArrayList(); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); + } + + + @Override + protected List getPartitionKeys() { + List fields = new ArrayList(); + //Defining partition names in unsorted order + fields.add(new FieldSchema("PaRT1", serdeConstants.STRING_TYPE_NAME, "")); + fields.add(new FieldSchema("part0", serdeConstants.STRING_TYPE_NAME, "")); + return fields; + } + + @Override + protected List getTableColumns() { + List fields = new ArrayList(); + fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")); + fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")); + return fields; + } + + + @Test + public void testHCatPartitionedTable() throws Exception { + + Map partitionMap = new HashMap(); + partitionMap.put("part1", "p1value1"); + partitionMap.put("part0", "p0value1"); + + runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); + + partitionMap.clear(); + partitionMap.put("PART1", "p1value2"); + partitionMap.put("PART0", "p0value2"); + + runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); + + //Test for duplicate publish + IOException exc = null; + try { + runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); + } catch (IOException e) { + exc = e; + } + + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_DUPLICATE_PARTITION, ((HCatException) exc).getErrorType()); + + //Test for publish with invalid partition key name + exc = null; + partitionMap.clear(); + partitionMap.put("px1", "p1value2"); + partitionMap.put("px0", "p0value2"); + + try { + runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); + } catch (IOException e) { + exc = e; + } + + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_MISSING_PARTITION_KEY, ((HCatException) exc).getErrorType()); + + //Test for publish with missing partition key values + exc = null; + partitionMap.clear(); + partitionMap.put("px", "p1value2"); + + try { + runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); + } catch (IOException e) { + exc = e; + } + + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_INVALID_PARTITION_VALUES, ((HCatException) exc).getErrorType()); + + + //Test for null partition value map + exc = null; + try { + runMRCreate(null, partitionColumns, writeRecords, 20, false); + } catch (IOException e) { + exc = e; + } + + assertTrue(exc == null); +// assertTrue(exc instanceof HCatException); +// assertEquals(ErrorType.ERROR_PUBLISHING_PARTITION, ((HCatException) exc).getErrorType()); + // With Dynamic partitioning, this isn't an error that the keyValues specified didn't values + + //Read should get 10 + 20 rows + runMRRead(30); + + //Read with partition filter + runMRRead(10, "part1 = \"p1value1\""); + runMRRead(20, "part1 = \"p1value2\""); + runMRRead(30, "part1 = \"p1value1\" or part1 = \"p1value2\""); + runMRRead(10, "part0 = \"p0value1\""); + runMRRead(20, "part0 = \"p0value2\""); + runMRRead(30, "part0 = \"p0value1\" or part0 = \"p0value2\""); + + tableSchemaTest(); + columnOrderChangeTest(); + hiveReadTest(); + } + + + //test that new columns gets added to table schema + private void tableSchemaTest() throws Exception { + + HCatSchema tableSchema = getTableSchema(); + + assertEquals(4, tableSchema.getFields().size()); + + //Update partition schema to have 3 fields + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); + + writeRecords = new ArrayList(); + + for (int i = 0; i < 20; i++) { + List objList = new ArrayList(); + + objList.add(i); + objList.add("strvalue" + i); + objList.add("str2value" + i); + + writeRecords.add(new DefaultHCatRecord(objList)); + } + + Map partitionMap = new HashMap(); + partitionMap.put("part1", "p1value5"); + partitionMap.put("part0", "p0value5"); + + runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); + + tableSchema = getTableSchema(); + + //assert that c3 has got added to table schema + assertEquals(5, tableSchema.getFields().size()); + assertEquals("c1", tableSchema.getFields().get(0).getName()); + assertEquals("c2", tableSchema.getFields().get(1).getName()); + assertEquals("c3", tableSchema.getFields().get(2).getName()); + assertEquals("part1", tableSchema.getFields().get(3).getName()); + assertEquals("part0", tableSchema.getFields().get(4).getName()); + + //Test that changing column data type fails + partitionMap.clear(); + partitionMap.put("part1", "p1value6"); + partitionMap.put("part0", "p0value6"); + + partitionColumns = new ArrayList(); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.INT_TYPE_NAME, ""))); + + IOException exc = null; + try { + runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true); + } catch (IOException e) { + exc = e; + } + + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, ((HCatException) exc).getErrorType()); + + //Test that partition key is not allowed in data + partitionColumns = new ArrayList(); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("part1", serdeConstants.STRING_TYPE_NAME, ""))); + + List recordsContainingPartitionCols = new ArrayList(20); + for (int i = 0; i < 20; i++) { + List objList = new ArrayList(); + + objList.add(i); + objList.add("c2value" + i); + objList.add("c3value" + i); + objList.add("p1value6"); + + recordsContainingPartitionCols.add(new DefaultHCatRecord(objList)); + } + + exc = null; + try { + runMRCreate(partitionMap, partitionColumns, recordsContainingPartitionCols, 20, true); + } catch (IOException e) { + exc = e; + } + + List records = runMRRead(20, "part1 = \"p1value6\""); + assertEquals(20, records.size()); + records = runMRRead(20, "part0 = \"p0value6\""); + assertEquals(20, records.size()); + Integer i = 0; + for (HCatRecord rec : records) { + assertEquals(5, rec.size()); + assertTrue(rec.get(0).equals(i)); + assertTrue(rec.get(1).equals("c2value" + i)); + assertTrue(rec.get(2).equals("c3value" + i)); + assertTrue(rec.get(3).equals("p1value6")); + assertTrue(rec.get(4).equals("p0value6")); + i++; + } + } + + //check behavior while change the order of columns + private void columnOrderChangeTest() throws Exception { + + HCatSchema tableSchema = getTableSchema(); + + assertEquals(5, tableSchema.getFields().size()); + + partitionColumns = new ArrayList(); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); + + + writeRecords = new ArrayList(); + + for (int i = 0; i < 10; i++) { + List objList = new ArrayList(); + + objList.add(i); + objList.add("co strvalue" + i); + objList.add("co str2value" + i); + + writeRecords.add(new DefaultHCatRecord(objList)); + } + + Map partitionMap = new HashMap(); + partitionMap.put("part1", "p1value8"); + partitionMap.put("part0", "p0value8"); + + Exception exc = null; + try { + runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); + } catch (IOException e) { + exc = e; + } + + assertTrue(exc != null); + assertTrue(exc instanceof HCatException); + assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType()); + + + partitionColumns = new ArrayList(); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""))); + partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""))); + + writeRecords = new ArrayList(); + + for (int i = 0; i < 10; i++) { + List objList = new ArrayList(); + + objList.add(i); + objList.add("co strvalue" + i); + + writeRecords.add(new DefaultHCatRecord(objList)); + } + + runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true); + + //Read should get 10 + 20 + 10 + 10 + 20 rows + runMRRead(70); + } + + //Test that data inserted through hcatoutputformat is readable from hive + private void hiveReadTest() throws Exception { + + String query = "select * from " + tableName; + int retCode = driver.run(query).getResponseCode(); + + if (retCode != 0) { + throw new Exception("Error " + retCode + " running query " + query); + } + + ArrayList res = new ArrayList(); + driver.getResults(res); + assertEquals(70, res.size()); + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestInputJobInfo.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestInputJobInfo.java new file mode 100644 index 0000000..b80bc36 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestInputJobInfo.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.mapreduce; + +import java.util.Properties; + +import junit.framework.Assert; + +import org.junit.Test; + +public class TestInputJobInfo extends HCatBaseTest { + + @Test + public void test4ArgCreate() throws Exception { + Properties p = new Properties(); + p.setProperty("key", "value"); + InputJobInfo jobInfo = InputJobInfo.create("Db", "Table", "Filter", p); + Assert.assertEquals("Db", jobInfo.getDatabaseName()); + Assert.assertEquals("Table", jobInfo.getTableName()); + Assert.assertEquals("Filter", jobInfo.getFilter()); + Assert.assertEquals("value", jobInfo.getProperties().getProperty("key")); + } + + @Test + public void test3ArgCreate() throws Exception { + InputJobInfo jobInfo = InputJobInfo.create("Db", "Table", "Filter"); + Assert.assertEquals("Db", jobInfo.getDatabaseName()); + Assert.assertEquals("Table", jobInfo.getTableName()); + Assert.assertEquals("Filter", jobInfo.getFilter()); + Assert.assertEquals(0, jobInfo.getProperties().size()); + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestMultiOutputFormat.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestMultiOutputFormat.java new file mode 100644 index 0000000..0667efa --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestMultiOutputFormat.java @@ -0,0 +1,334 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.net.URI; +import java.util.Arrays; +import java.util.List; +import java.util.Random; +import java.util.StringTokenizer; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.filecache.DistributedCache; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.MiniMRCluster; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hive.hcatalog.mapreduce.MultiOutputFormat.JobConfigurer; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestMultiOutputFormat { + + private static final Logger LOG = LoggerFactory.getLogger(TestMultiOutputFormat.class); + private static File workDir; + private static Configuration mrConf = null; + private static FileSystem fs = null; + private static MiniMRCluster mrCluster = null; + + @BeforeClass + public static void setup() throws IOException { + createWorkDir(); + Configuration conf = new Configuration(true); + conf.set("yarn.scheduler.capacity.root.queues", "default"); + conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); + + fs = FileSystem.get(conf); + System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); + // LocalJobRunner does not work with mapreduce OutputCommitter. So need + // to use MiniMRCluster. MAPREDUCE-2350 + mrCluster = new MiniMRCluster(1, fs.getUri().toString(), 1, null, null, + new JobConf(conf)); + mrConf = mrCluster.createJobConf(); + } + + private static void createWorkDir() throws IOException { + String testDir = System.getProperty("test.data.dir", "./"); + testDir = testDir + "/test_multiout_" + Math.abs(new Random().nextLong()) + "/"; + workDir = new File(new File(testDir).getCanonicalPath()); + FileUtil.fullyDelete(workDir); + workDir.mkdirs(); + } + + @AfterClass + public static void tearDown() throws IOException { + if (mrCluster != null) { + mrCluster.shutdown(); + } + FileUtil.fullyDelete(workDir); + } + + /** + * A test job that reads a input file and outputs each word and the index of + * the word encountered to a text file and sequence file with different key + * values. + */ + @Test + public void testMultiOutputFormatWithoutReduce() throws Throwable { + Job job = new Job(mrConf, "MultiOutNoReduce"); + job.setMapperClass(MultiOutWordIndexMapper.class); + job.setJarByClass(this.getClass()); + job.setInputFormatClass(TextInputFormat.class); + job.setOutputFormatClass(MultiOutputFormat.class); + job.setNumReduceTasks(0); + + JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); + configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class); + configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, + IntWritable.class); + Path outDir = new Path(workDir.getPath(), job.getJobName()); + FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1")); + FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2")); + + String fileContent = "Hello World"; + String inputFile = createInputFile(fileContent); + FileInputFormat.setInputPaths(job, new Path(inputFile)); + + //Test for merging of configs + DistributedCache.addFileToClassPath(new Path(inputFile), job.getConfiguration(), fs); + String dummyFile = createInputFile("dummy file"); + DistributedCache.addFileToClassPath(new Path(dummyFile), configurer.getJob("out1") + .getConfiguration(), fs); + // duplicate of the value. Merging should remove duplicates + DistributedCache.addFileToClassPath(new Path(inputFile), configurer.getJob("out2") + .getConfiguration(), fs); + + configurer.configure(); + + // Verify if the configs are merged + Path[] fileClassPaths = DistributedCache.getFileClassPaths(job.getConfiguration()); + List fileClassPathsList = Arrays.asList(fileClassPaths); + Assert.assertTrue(fileClassPathsList.contains(new Path(inputFile))); + Assert.assertTrue(fileClassPathsList.contains(new Path(dummyFile))); + + URI[] cacheFiles = DistributedCache.getCacheFiles(job.getConfiguration()); + List cacheFilesList = Arrays.asList(cacheFiles); + Assert.assertTrue(cacheFilesList.contains(new Path(inputFile).makeQualified(fs).toUri())); + Assert.assertTrue(cacheFilesList.contains(new Path(dummyFile).makeQualified(fs).toUri())); + + Assert.assertTrue(job.waitForCompletion(true)); + + Path textOutPath = new Path(outDir, "out1/part-m-00000"); + String[] textOutput = readFully(textOutPath).split("\n"); + Path seqOutPath = new Path(outDir, "out2/part-m-00000"); + SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf); + Text key = new Text(); + IntWritable value = new IntWritable(); + String[] words = fileContent.split(" "); + Assert.assertEquals(words.length, textOutput.length); + LOG.info("Verifying file contents"); + for (int i = 0; i < words.length; i++) { + Assert.assertEquals((i + 1) + "\t" + words[i], textOutput[i]); + reader.next(key, value); + Assert.assertEquals(words[i], key.toString()); + Assert.assertEquals((i + 1), value.get()); + } + Assert.assertFalse(reader.next(key, value)); + } + + /** + * A word count test job that reads a input file and outputs the count of + * words to a text file and sequence file with different key values. + */ + @Test + public void testMultiOutputFormatWithReduce() throws Throwable { + Job job = new Job(mrConf, "MultiOutWithReduce"); + + job.setMapperClass(WordCountMapper.class); + job.setReducerClass(MultiOutWordCountReducer.class); + job.setJarByClass(this.getClass()); + job.setInputFormatClass(TextInputFormat.class); + job.setOutputFormatClass(MultiOutputFormat.class); + job.setMapOutputKeyClass(Text.class); + job.setMapOutputValueClass(IntWritable.class); + + JobConfigurer configurer = MultiOutputFormat.createConfigurer(job); + + configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class); + configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, + IntWritable.class); + configurer.addOutputFormat("out3", NullOutputFormat.class, Text.class, + IntWritable.class); + Path outDir = new Path(workDir.getPath(), job.getJobName()); + FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1")); + FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2")); + + configurer.configure(); + + String fileContent = "Hello World Hello World World"; + String inputFile = createInputFile(fileContent); + FileInputFormat.setInputPaths(job, new Path(inputFile)); + + Assert.assertTrue(job.waitForCompletion(true)); + + Path textOutPath = new Path(outDir, "out1/part-r-00000"); + String[] textOutput = readFully(textOutPath).split("\n"); + Path seqOutPath = new Path(outDir, "out2/part-r-00000"); + SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf); + Text key = new Text(); + IntWritable value = new IntWritable(); + String[] words = "Hello World".split(" "); + Assert.assertEquals(words.length, textOutput.length); + for (int i = 0; i < words.length; i++) { + Assert.assertEquals((i + 2) + "\t" + words[i], textOutput[i]); + reader.next(key, value); + Assert.assertEquals(words[i], key.toString()); + Assert.assertEquals((i + 2), value.get()); + } + Assert.assertFalse(reader.next(key, value)); + + } + + + /** + * Create a file for map input + * + * @return absolute path of the file. + * @throws IOException if any error encountered + */ + private String createInputFile(String content) throws IOException { + File f = File.createTempFile("input", "txt"); + FileWriter writer = new FileWriter(f); + writer.write(content); + writer.close(); + return f.getAbsolutePath(); + } + + private String readFully(Path file) throws IOException { + FSDataInputStream in = fs.open(file); + byte[] b = new byte[in.available()]; + in.readFully(b); + in.close(); + return new String(b); + } + + private static class MultiOutWordIndexMapper extends + Mapper { + + private IntWritable index = new IntWritable(1); + private Text word = new Text(); + + @Override + protected void map(LongWritable key, Text value, Context context) + throws IOException, InterruptedException { + StringTokenizer itr = new StringTokenizer(value.toString()); + while (itr.hasMoreTokens()) { + word.set(itr.nextToken()); + MultiOutputFormat.write("out1", index, word, context); + MultiOutputFormat.write("out2", word, index, context); + index.set(index.get() + 1); + } + } + } + + private static class WordCountMapper extends + Mapper { + + private final static IntWritable one = new IntWritable(1); + private Text word = new Text(); + + @Override + protected void map(LongWritable key, Text value, Context context) + throws IOException, InterruptedException { + StringTokenizer itr = new StringTokenizer(value.toString()); + while (itr.hasMoreTokens()) { + word.set(itr.nextToken()); + context.write(word, one); + } + } + } + + private static class MultiOutWordCountReducer extends + Reducer { + + private IntWritable count = new IntWritable(); + + @Override + protected void reduce(Text word, Iterable values, Context context) + throws IOException, InterruptedException { + int sum = 0; + for (IntWritable val : values) { + sum += val.get(); + } + count.set(sum); + MultiOutputFormat.write("out1", count, word, context); + MultiOutputFormat.write("out2", word, count, context); + MultiOutputFormat.write("out3", word, count, context); + } + } + + private static class NullOutputFormat extends + org.apache.hadoop.mapreduce.lib.output.NullOutputFormat { + + @Override + public OutputCommitter getOutputCommitter(TaskAttemptContext context) { + return new OutputCommitter() { + public void abortTask(TaskAttemptContext taskContext) { + } + + public void cleanupJob(JobContext jobContext) { + } + + public void commitJob(JobContext jobContext) { + } + + public void commitTask(TaskAttemptContext taskContext) { + Assert.fail("needsTaskCommit is false but commitTask was called"); + } + + public boolean needsTaskCommit(TaskAttemptContext taskContext) { + return false; + } + + public void setupJob(JobContext jobContext) { + } + + public void setupTask(TaskAttemptContext taskContext) { + } + }; + } + } + +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestPassProperties.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestPassProperties.java new file mode 100644 index 0000000..8d286ab --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestPassProperties.java @@ -0,0 +1,143 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hive.hcatalog.HcatTestUtils; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.pig.ExecType; +import org.apache.pig.PigServer; +import org.junit.Test; + +public class TestPassProperties { + private static final String TEST_DATA_DIR = System.getProperty("user.dir") + + "/build/test/data/" + TestSequenceFileReadWrite.class.getCanonicalName(); + private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; + private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; + + private static Driver driver; + private static PigServer server; + private static String[] input; + private static HiveConf hiveConf; + + public void Initialize() throws Exception { + hiveConf = new HiveConf(this.getClass()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); + driver = new Driver(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); + + new File(TEST_WAREHOUSE_DIR).mkdirs(); + + int numRows = 3; + input = new String[numRows]; + for (int i = 0; i < numRows; i++) { + String col1 = "a" + i; + String col2 = "b" + i; + input[i] = i + "," + col1 + "," + col2; + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + server = new PigServer(ExecType.LOCAL); + } + + @Test + public void testSequenceTableWriteReadMR() throws Exception { + Initialize(); + String createTable = "CREATE TABLE bad_props_table(a0 int, a1 String, a2 String) STORED AS SEQUENCEFILE"; + driver.run("drop table bad_props_table"); + int retCode1 = driver.run(createTable).getResponseCode(); + assertTrue(retCode1 == 0); + + boolean caughtException = false; + try { + Configuration conf = new Configuration(); + conf.set("hive.metastore.uris", "thrift://no.such.machine:10888"); + conf.set("hive.metastore.local", "false"); + Job job = new Job(conf, "Write-hcat-seq-table"); + job.setJarByClass(TestSequenceFileReadWrite.class); + + job.setMapperClass(Map.class); + job.setOutputKeyClass(NullWritable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + job.setInputFormatClass(TextInputFormat.class); + TextInputFormat.setInputPaths(job, INPUT_FILE_NAME); + + HCatOutputFormat.setOutput(job, OutputJobInfo.create( + MetaStoreUtils.DEFAULT_DATABASE_NAME, "bad_props_table", null)); + job.setOutputFormatClass(HCatOutputFormat.class); + HCatOutputFormat.setSchema(job, getSchema()); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + new FileOutputCommitterContainer(job, null).cleanupJob(job); + } catch (Exception e) { + caughtException = true; + assertTrue(e.getMessage().contains( + "Could not connect to meta store using any of the URIs provided")); + } + assertTrue(caughtException); + } + + public static class Map extends Mapper { + + public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + String[] cols = value.toString().split(","); + DefaultHCatRecord record = new DefaultHCatRecord(3); + record.set(0, Integer.parseInt(cols[0])); + record.set(1, cols[1]); + record.set(2, cols[2]); + context.write(NullWritable.get(), record); + } + } + + private HCatSchema getSchema() throws HCatException { + HCatSchema schema = new HCatSchema(new ArrayList()); + schema.append(new HCatFieldSchema("a0", HCatFieldSchema.Type.INT, + "")); + schema.append(new HCatFieldSchema("a1", + HCatFieldSchema.Type.STRING, "")); + schema.append(new HCatFieldSchema("a2", + HCatFieldSchema.Type.STRING, "")); + return schema; + } + + +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestSequenceFileReadWrite.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestSequenceFileReadWrite.java new file mode 100644 index 0000000..4c59f7e --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/mapreduce/TestSequenceFileReadWrite.java @@ -0,0 +1,265 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.mapreduce; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; + +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hive.hcatalog.HcatTestUtils; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.pig.ExecType; +import org.apache.pig.PigServer; +import org.apache.pig.data.Tuple; +import org.junit.Test; + +public class TestSequenceFileReadWrite extends TestCase { + private static final String TEST_DATA_DIR = + "/tmp/build/test/data/" + TestSequenceFileReadWrite.class.getCanonicalName(); + private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; + private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; + + private static Driver driver; + private static PigServer server; + private static String[] input; + private static HiveConf hiveConf; + + public void Initialize() throws Exception { + hiveConf = new HiveConf(this.getClass()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); + driver = new Driver(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); + + new File(TEST_WAREHOUSE_DIR).mkdirs(); + + int numRows = 3; + input = new String[numRows]; + for (int i = 0; i < numRows; i++) { + String col1 = "a" + i; + String col2 = "b" + i; + input[i] = i + "," + col1 + "," + col2; + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + server = new PigServer(ExecType.LOCAL); + } + + @Test + public void testSequenceTableWriteRead() throws Exception { + Initialize(); + String createTable = "CREATE TABLE demo_table(a0 int, a1 String, a2 String) STORED AS SEQUENCEFILE"; + driver.run("drop table demo_table"); + int retCode1 = driver.run(createTable).getResponseCode(); + assertTrue(retCode1 == 0); + + server.setBatchOn(); + server.registerQuery("A = load '" + + INPUT_FILE_NAME + + "' using PigStorage(',') as (a0:int,a1:chararray,a2:chararray);"); + server.registerQuery("store A into 'demo_table' using org.apache.hive.hcatalog.pig.HCatStorer();"); + server.executeBatch(); + + server.registerQuery("B = load 'demo_table' using org.apache.hive.hcatalog.pig.HCatLoader();"); + Iterator XIter = server.openIterator("B"); + int numTuplesRead = 0; + while (XIter.hasNext()) { + Tuple t = XIter.next(); + assertEquals(3, t.size()); + assertEquals(t.get(0).toString(), "" + numTuplesRead); + assertEquals(t.get(1).toString(), "a" + numTuplesRead); + assertEquals(t.get(2).toString(), "b" + numTuplesRead); + numTuplesRead++; + } + assertEquals(input.length, numTuplesRead); + } + + @Test + public void testTextTableWriteRead() throws Exception { + Initialize(); + String createTable = "CREATE TABLE demo_table_1(a0 int, a1 String, a2 String) STORED AS TEXTFILE"; + driver.run("drop table demo_table_1"); + int retCode1 = driver.run(createTable).getResponseCode(); + assertTrue(retCode1 == 0); + + server.setBatchOn(); + server.registerQuery("A = load '" + + INPUT_FILE_NAME + + "' using PigStorage(',') as (a0:int,a1:chararray,a2:chararray);"); + server.registerQuery("store A into 'demo_table_1' using org.apache.hive.hcatalog.pig.HCatStorer();"); + server.executeBatch(); + + server.registerQuery("B = load 'demo_table_1' using org.apache.hive.hcatalog.pig.HCatLoader();"); + Iterator XIter = server.openIterator("B"); + int numTuplesRead = 0; + while (XIter.hasNext()) { + Tuple t = XIter.next(); + assertEquals(3, t.size()); + assertEquals(t.get(0).toString(), "" + numTuplesRead); + assertEquals(t.get(1).toString(), "a" + numTuplesRead); + assertEquals(t.get(2).toString(), "b" + numTuplesRead); + numTuplesRead++; + } + assertEquals(input.length, numTuplesRead); + } + + @Test + public void testSequenceTableWriteReadMR() throws Exception { + Initialize(); + String createTable = "CREATE TABLE demo_table_2(a0 int, a1 String, a2 String) STORED AS SEQUENCEFILE"; + driver.run("drop table demo_table_2"); + int retCode1 = driver.run(createTable).getResponseCode(); + assertTrue(retCode1 == 0); + + Configuration conf = new Configuration(); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(hiveConf.getAllProperties())); + Job job = new Job(conf, "Write-hcat-seq-table"); + job.setJarByClass(TestSequenceFileReadWrite.class); + + job.setMapperClass(Map.class); + job.setOutputKeyClass(NullWritable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + job.setInputFormatClass(TextInputFormat.class); + TextInputFormat.setInputPaths(job, INPUT_FILE_NAME); + + HCatOutputFormat.setOutput(job, OutputJobInfo.create( + MetaStoreUtils.DEFAULT_DATABASE_NAME, "demo_table_2", null)); + job.setOutputFormatClass(HCatOutputFormat.class); + HCatOutputFormat.setSchema(job, getSchema()); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + if (!HCatUtil.isHadoop23()) { + new FileOutputCommitterContainer(job, null).commitJob(job); + } + assertTrue(job.isSuccessful()); + + server.setBatchOn(); + server.registerQuery("C = load 'default.demo_table_2' using org.apache.hive.hcatalog.pig.HCatLoader();"); + server.executeBatch(); + Iterator XIter = server.openIterator("C"); + int numTuplesRead = 0; + while (XIter.hasNext()) { + Tuple t = XIter.next(); + assertEquals(3, t.size()); + assertEquals(t.get(0).toString(), "" + numTuplesRead); + assertEquals(t.get(1).toString(), "a" + numTuplesRead); + assertEquals(t.get(2).toString(), "b" + numTuplesRead); + numTuplesRead++; + } + assertEquals(input.length, numTuplesRead); + } + + @Test + public void testTextTableWriteReadMR() throws Exception { + Initialize(); + String createTable = "CREATE TABLE demo_table_3(a0 int, a1 String, a2 String) STORED AS TEXTFILE"; + driver.run("drop table demo_table_3"); + int retCode1 = driver.run(createTable).getResponseCode(); + assertTrue(retCode1 == 0); + + Configuration conf = new Configuration(); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(hiveConf.getAllProperties())); + Job job = new Job(conf, "Write-hcat-text-table"); + job.setJarByClass(TestSequenceFileReadWrite.class); + + job.setMapperClass(Map.class); + job.setOutputKeyClass(NullWritable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + job.setInputFormatClass(TextInputFormat.class); + job.setNumReduceTasks(0); + TextInputFormat.setInputPaths(job, INPUT_FILE_NAME); + + HCatOutputFormat.setOutput(job, OutputJobInfo.create( + MetaStoreUtils.DEFAULT_DATABASE_NAME, "demo_table_3", null)); + job.setOutputFormatClass(HCatOutputFormat.class); + HCatOutputFormat.setSchema(job, getSchema()); + assertTrue(job.waitForCompletion(true)); + if (!HCatUtil.isHadoop23()) { + new FileOutputCommitterContainer(job, null).commitJob(job); + } + assertTrue(job.isSuccessful()); + + server.setBatchOn(); + server.registerQuery("D = load 'default.demo_table_3' using org.apache.hive.hcatalog.pig.HCatLoader();"); + server.executeBatch(); + Iterator XIter = server.openIterator("D"); + int numTuplesRead = 0; + while (XIter.hasNext()) { + Tuple t = XIter.next(); + assertEquals(3, t.size()); + assertEquals(t.get(0).toString(), "" + numTuplesRead); + assertEquals(t.get(1).toString(), "a" + numTuplesRead); + assertEquals(t.get(2).toString(), "b" + numTuplesRead); + numTuplesRead++; + } + assertEquals(input.length, numTuplesRead); + } + + + public static class Map extends Mapper { + + public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + String[] cols = value.toString().split(","); + DefaultHCatRecord record = new DefaultHCatRecord(3); + record.set(0, Integer.parseInt(cols[0])); + record.set(1, cols[1]); + record.set(2, cols[2]); + context.write(NullWritable.get(), record); + } + } + + private HCatSchema getSchema() throws HCatException { + HCatSchema schema = new HCatSchema(new ArrayList()); + schema.append(new HCatFieldSchema("a0", HCatFieldSchema.Type.INT, + "")); + schema.append(new HCatFieldSchema("a1", + HCatFieldSchema.Type.STRING, "")); + schema.append(new HCatFieldSchema("a2", + HCatFieldSchema.Type.STRING, "")); + return schema; + } + +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileInputStorageDriver.java.broken hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileInputStorageDriver.java.broken new file mode 100644 index 0000000..3942b07 --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileInputStorageDriver.java.broken @@ -0,0 +1,294 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hcatalog.rcfile; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.*; + +import junit.framework.Assert; +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.io.RCFile; +import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; +import org.apache.hadoop.hive.serde.Constants; +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; +import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; +import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; +import org.apache.hadoop.io.compress.DefaultCodec; +import org.apache.hadoop.mapreduce.InputFormat; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.JobID; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hcatalog.common.HCatException; +import org.apache.hcatalog.common.HCatUtil; +import org.apache.hcatalog.data.DefaultHCatRecord; +import org.apache.hcatalog.data.HCatDataCheckUtil; +import org.apache.hcatalog.data.HCatRecord; +import org.apache.hcatalog.data.schema.HCatSchema; +import org.apache.hcatalog.rcfile.RCFileInputDriver; +import org.apache.hcatalog.shims.HCatHadoopShims; + + +public class TestRCFileInputStorageDriver extends TestCase{ + private static final Configuration conf = new Configuration(); + private static final Path dir = new Path(System.getProperty("test.data.dir", ".") + "/mapred"); + private static final Path file = new Path(dir, "test_rcfile"); + private final HCatHadoopShims shim = HCatHadoopShims.Instance.get(); + + // Generate sample records to compare against + private byte[][][] getRecords() throws UnsupportedEncodingException { + byte[][] record_1 = {"123".getBytes("UTF-8"), "456".getBytes("UTF-8"), + "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"), + "5.3".getBytes("UTF-8"), "hcatalog and hadoop".getBytes("UTF-8"), + new byte[0], "\\N".getBytes("UTF-8")}; + byte[][] record_2 = {"100".getBytes("UTF-8"), "200".getBytes("UTF-8"), + "123".getBytes("UTF-8"), "1000".getBytes("UTF-8"), + "5.3".getBytes("UTF-8"), "hcatalog and hadoop".getBytes("UTF-8"), + new byte[0], "\\N".getBytes("UTF-8")}; + return new byte[][][]{record_1, record_2}; + } + + // Write sample records to file for individual tests + private BytesRefArrayWritable[] initTestEnvironment() throws IOException { + FileSystem fs = FileSystem.getLocal(conf); + fs.delete(file, true); + + byte [][][] records = getRecords(); + RCFileOutputFormat.setColumnNumber(conf, 8); + RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec()); + + BytesRefArrayWritable bytes = writeBytesToFile(records[0], writer); + BytesRefArrayWritable bytes2 = writeBytesToFile(records[1], writer); + + writer.close(); + return new BytesRefArrayWritable[]{bytes,bytes2}; + } + + private BytesRefArrayWritable writeBytesToFile(byte[][] record, RCFile.Writer writer) throws IOException { + BytesRefArrayWritable bytes = new BytesRefArrayWritable(record.length); + for (int i = 0; i < record.length; i++) { + BytesRefWritable cu = new BytesRefWritable(record[i], 0, record[i].length); + bytes.set(i, cu); + } + writer.append(bytes); + return bytes; + } + + public void testConvertValueToTuple() throws IOException,InterruptedException{ + BytesRefArrayWritable[] bytesArr = initTestEnvironment(); + + HCatSchema schema = buildHiveSchema(); + RCFileInputDriver sd = new RCFileInputDriver(); + JobContext jc = shim.createJobContext(conf, new JobID()); + sd.setInputPath(jc, file.toString()); + InputFormat iF = sd.getInputFormat(null); + InputSplit split = iF.getSplits(jc).get(0); + sd.setOriginalSchema(jc, schema); + sd.setOutputSchema(jc, schema); + sd.initialize(jc, getProps()); + + TaskAttemptContext tac = shim.createTaskAttemptContext(conf, new TaskAttemptID()); + RecordReader rr = iF.createRecordReader(split,tac); + rr.initialize(split, tac); + HCatRecord[] tuples = getExpectedRecords(); + for(int j=0; j < 2; j++){ + Assert.assertTrue(rr.nextKeyValue()); + BytesRefArrayWritable w = (BytesRefArrayWritable)rr.getCurrentValue(); + Assert.assertEquals(bytesArr[j], w); + HCatRecord t = sd.convertToHCatRecord(null,w); + Assert.assertEquals(8, t.size()); + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(t,tuples[j])); + } + } + + public void testPruning() throws IOException,InterruptedException{ + BytesRefArrayWritable[] bytesArr = initTestEnvironment(); + + RCFileInputDriver sd = new RCFileInputDriver(); + JobContext jc = shim.createJobContext(conf, new JobID()); + sd.setInputPath(jc, file.toString()); + InputFormat iF = sd.getInputFormat(null); + InputSplit split = iF.getSplits(jc).get(0); + sd.setOriginalSchema(jc, buildHiveSchema()); + sd.setOutputSchema(jc, buildPrunedSchema()); + + sd.initialize(jc, getProps()); + conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR,jc.getConfiguration().get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR)); + TaskAttemptContext tac = shim.createTaskAttemptContext(conf, new TaskAttemptID()); + RecordReader rr = iF.createRecordReader(split,tac); + rr.initialize(split, tac); + HCatRecord[] tuples = getPrunedRecords(); + for(int j=0; j < 2; j++){ + Assert.assertTrue(rr.nextKeyValue()); + BytesRefArrayWritable w = (BytesRefArrayWritable)rr.getCurrentValue(); + Assert.assertFalse(bytesArr[j].equals(w)); + Assert.assertEquals(w.size(), 8); + HCatRecord t = sd.convertToHCatRecord(null,w); + Assert.assertEquals(5, t.size()); + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(t,tuples[j])); + } + assertFalse(rr.nextKeyValue()); + } + + public void testReorderdCols() throws IOException,InterruptedException{ + BytesRefArrayWritable[] bytesArr = initTestEnvironment(); + + RCFileInputDriver sd = new RCFileInputDriver(); + JobContext jc = shim.createJobContext(conf, new JobID()); + sd.setInputPath(jc, file.toString()); + InputFormat iF = sd.getInputFormat(null); + InputSplit split = iF.getSplits(jc).get(0); + sd.setOriginalSchema(jc, buildHiveSchema()); + sd.setOutputSchema(jc, buildReorderedSchema()); + + sd.initialize(jc, getProps()); + Map map = new HashMap(1); + map.put("part1", "first-part"); + sd.setPartitionValues(jc, map); + conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR,jc.getConfiguration().get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR)); + TaskAttemptContext tac = shim.createTaskAttemptContext(conf, new TaskAttemptID()); + RecordReader rr = iF.createRecordReader(split,tac); + rr.initialize(split, tac); + HCatRecord[] tuples = getReorderedCols(); + for(int j=0; j < 2; j++){ + Assert.assertTrue(rr.nextKeyValue()); + BytesRefArrayWritable w = (BytesRefArrayWritable)rr.getCurrentValue(); + Assert.assertFalse(bytesArr[j].equals(w)); + Assert.assertEquals(w.size(), 8); + HCatRecord t = sd.convertToHCatRecord(null,w); + Assert.assertEquals(7, t.size()); + Assert.assertTrue(HCatDataCheckUtil.recordsEqual(t,tuples[j])); + } + assertFalse(rr.nextKeyValue()); + } + private HCatRecord[] getExpectedRecords(){ + List rec_1 = new ArrayList(8); + Collections.addAll(rec_1, new Byte("123"), + new Short("456"), + new Integer(789), + new Long(1000L), + new Double(5.3D), + new String("hcatalog and hadoop"), + null, + null); + + HCatRecord tup_1 = new DefaultHCatRecord(rec_1); + + List rec_2 = new ArrayList(8); + Collections.addAll(rec_2, new Byte("100"), + new Short("200"), + new Integer(123), + new Long(1000L), + new Double(5.3D), + new String("hcatalog and hadoop"), + null, + null); + HCatRecord tup_2 = new DefaultHCatRecord(rec_2); + + return new HCatRecord[]{tup_1,tup_2}; + } + + private HCatRecord[] getPrunedRecords(){ + List rec_1 = new ArrayList(8); + Collections.addAll(rec_1, new Byte("123"), + new Integer(789), + new Double(5.3D), + new String("hcatalog and hadoop"), + null); + HCatRecord tup_1 = new DefaultHCatRecord(rec_1); + + List rec_2 = new ArrayList(8); + Collections.addAll(rec_2, new Byte("100"), + new Integer(123), + new Double(5.3D), + new String("hcatalog and hadoop"), + null); + HCatRecord tup_2 = new DefaultHCatRecord(rec_2); + + return new HCatRecord[]{tup_1,tup_2}; + } + + private HCatSchema buildHiveSchema() throws HCatException{ + return new HCatSchema(HCatUtil.getHCatFieldSchemaList(new FieldSchema("atinyint", "tinyint", ""), + new FieldSchema("asmallint", "smallint", ""), + new FieldSchema("aint", "int", ""), + new FieldSchema("along", "bigint", ""), + new FieldSchema("adouble", "double", ""), + new FieldSchema("astring", "string", ""), + new FieldSchema("anullint", "int", ""), + new FieldSchema("anullstring", "string", ""))); + } + + private HCatSchema buildPrunedSchema() throws HCatException{ + return new HCatSchema(HCatUtil.getHCatFieldSchemaList(new FieldSchema("atinyint", "tinyint", ""), + new FieldSchema("aint", "int", ""), + new FieldSchema("adouble", "double", ""), + new FieldSchema("astring", "string", ""), + new FieldSchema("anullint", "int", ""))); + } + + private HCatSchema buildReorderedSchema() throws HCatException{ + return new HCatSchema(HCatUtil.getHCatFieldSchemaList(new FieldSchema("aint", "int", ""), + new FieldSchema("part1", "string", ""), + new FieldSchema("adouble", "double", ""), + new FieldSchema("newCol", "tinyint", ""), + new FieldSchema("astring", "string", ""), + new FieldSchema("atinyint", "tinyint", ""), + new FieldSchema("anullint", "int", ""))); + } + + private HCatRecord[] getReorderedCols(){ + List rec_1 = new ArrayList(7); + Collections.addAll(rec_1, new Integer(789), + new String("first-part"), + new Double(5.3D), + null, // new column + new String("hcatalog and hadoop"), + new Byte("123"), + null); + HCatRecord tup_1 = new DefaultHCatRecord(rec_1); + + List rec_2 = new ArrayList(7); + Collections.addAll(rec_2, new Integer(123), + new String("first-part"), + new Double(5.3D), + null, + new String("hcatalog and hadoop"), + new Byte("100"), + null); + HCatRecord tup_2 = new DefaultHCatRecord(rec_2); + + return new HCatRecord[]{tup_1,tup_2}; + + } + private Properties getProps(){ + Properties props = new Properties(); + props.setProperty(Constants.SERIALIZATION_NULL_FORMAT, "\\N"); + props.setProperty(Constants.SERIALIZATION_FORMAT, "9"); + return props; + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java new file mode 100644 index 0000000..edc2e4e --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileMapReduceInputFormat.java @@ -0,0 +1,249 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.rcfile; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.List; +import java.util.Properties; + +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.io.RCFile; +import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; +import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; +import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * TestRCFile. + * + */ +public class TestRCFileMapReduceInputFormat extends TestCase { + + private static final Logger LOG = LoggerFactory.getLogger(TestRCFileMapReduceInputFormat.class); + + private static Configuration conf = new Configuration(); + + private static ColumnarSerDe serDe; + + private static Path file; + + private static FileSystem fs; + + private static Properties tbl; + + static { + try { + fs = FileSystem.getLocal(conf); + Path dir = new Path(System.getProperty("test.data.dir", ".") + "/mapred"); + file = new Path(dir, "test_rcfile"); + fs.delete(dir, true); + // the SerDe part is from TestLazySimpleSerDe + serDe = new ColumnarSerDe(); + // Create the SerDe + tbl = createProperties(); + serDe.initialize(conf, tbl); + } catch (Exception e) { + } + } + + private static BytesRefArrayWritable patialS = new BytesRefArrayWritable(); + + private static byte[][] bytesArray = null; + + private static BytesRefArrayWritable s = null; + + static { + try { + bytesArray = new byte[][]{"123".getBytes("UTF-8"), + "456".getBytes("UTF-8"), "789".getBytes("UTF-8"), + "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), + "hive and hadoop".getBytes("UTF-8"), new byte[0], + "NULL".getBytes("UTF-8")}; + s = new BytesRefArrayWritable(bytesArray.length); + s.set(0, new BytesRefWritable("123".getBytes("UTF-8"))); + s.set(1, new BytesRefWritable("456".getBytes("UTF-8"))); + s.set(2, new BytesRefWritable("789".getBytes("UTF-8"))); + s.set(3, new BytesRefWritable("1000".getBytes("UTF-8"))); + s.set(4, new BytesRefWritable("5.3".getBytes("UTF-8"))); + s.set(5, new BytesRefWritable("hive and hadoop".getBytes("UTF-8"))); + s.set(6, new BytesRefWritable("NULL".getBytes("UTF-8"))); + s.set(7, new BytesRefWritable("NULL".getBytes("UTF-8"))); + + // partial test init + patialS.set(0, new BytesRefWritable("NULL".getBytes("UTF-8"))); + patialS.set(1, new BytesRefWritable("NULL".getBytes("UTF-8"))); + patialS.set(2, new BytesRefWritable("789".getBytes("UTF-8"))); + patialS.set(3, new BytesRefWritable("1000".getBytes("UTF-8"))); + patialS.set(4, new BytesRefWritable("NULL".getBytes("UTF-8"))); + patialS.set(5, new BytesRefWritable("NULL".getBytes("UTF-8"))); + patialS.set(6, new BytesRefWritable("NULL".getBytes("UTF-8"))); + patialS.set(7, new BytesRefWritable("NULL".getBytes("UTF-8"))); + + } catch (UnsupportedEncodingException e) { + } + } + + + /** For debugging and testing. */ + public static void main(String[] args) throws Exception { + int count = 10000; + boolean create = true; + + String usage = "Usage: RCFile " + "[-count N]" + " file"; + if (args.length == 0) { + LOG.error(usage); + System.exit(-1); + } + + try { + for (int i = 0; i < args.length; ++i) { // parse command line + if (args[i] == null) { + continue; + } else if (args[i].equals("-count")) { + count = Integer.parseInt(args[++i]); + } else { + // file is required parameter + file = new Path(args[i]); + } + } + + if (file == null) { + LOG.error(usage); + System.exit(-1); + } + + LOG.info("count = {}", count); + LOG.info("create = {}", create); + LOG.info("file = {}", file); + + // test.performanceTest(); + LOG.info("Finished."); + } finally { + fs.close(); + } + } + + private static Properties createProperties() { + Properties tbl = new Properties(); + + // Set the configuration parameters + tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9"); + tbl.setProperty("columns", + "abyte,ashort,aint,along,adouble,astring,anullint,anullstring"); + tbl.setProperty("columns.types", + "tinyint:smallint:int:bigint:double:string:int:string"); + tbl.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL"); + return tbl; + } + + + public void testSynAndSplit() throws IOException, InterruptedException { + splitBeforeSync(); + splitRightBeforeSync(); + splitInMiddleOfSync(); + splitRightAfterSync(); + splitAfterSync(); + } + + private void splitBeforeSync() throws IOException, InterruptedException { + writeThenReadByRecordReader(600, 1000, 2, 17684, null); + } + + private void splitRightBeforeSync() throws IOException, InterruptedException { + writeThenReadByRecordReader(500, 1000, 2, 17750, null); + } + + private void splitInMiddleOfSync() throws IOException, InterruptedException { + writeThenReadByRecordReader(500, 1000, 2, 17760, null); + + } + + private void splitRightAfterSync() throws IOException, InterruptedException { + writeThenReadByRecordReader(500, 1000, 2, 17770, null); + } + + private void splitAfterSync() throws IOException, InterruptedException { + writeThenReadByRecordReader(500, 1000, 2, 19950, null); + } + + private void writeThenReadByRecordReader(int intervalRecordCount, + int writeCount, int splitNumber, long maxSplitSize, CompressionCodec codec) + throws IOException, InterruptedException { + Path testDir = new Path(System.getProperty("test.data.dir", ".") + + "/mapred/testsmallfirstsplit"); + Path testFile = new Path(testDir, "test_rcfile"); + fs.delete(testFile, true); + Configuration cloneConf = new Configuration(conf); + RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length); + cloneConf.setInt(RCFile.RECORD_INTERVAL_CONF_STR, intervalRecordCount); + + RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec); + + BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length); + for (int i = 0; i < bytesArray.length; i++) { + BytesRefWritable cu = null; + cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length); + bytes.set(i, cu); + } + for (int i = 0; i < writeCount; i++) { + writer.append(bytes); + } + writer.close(); + + RCFileMapReduceInputFormat inputFormat = new RCFileMapReduceInputFormat(); + Configuration jonconf = new Configuration(cloneConf); + jonconf.set("mapred.input.dir", testDir.toString()); + JobContext context = new Job(jonconf); + context.getConfiguration().setLong("mapred.max.split.size", maxSplitSize); + List splits = inputFormat.getSplits(context); + assertEquals("splits length should be " + splitNumber, splits.size(), splitNumber); + int readCount = 0; + for (int i = 0; i < splits.size(); i++) { + TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(jonconf, + new TaskAttemptID()); + RecordReader rr = inputFormat.createRecordReader(splits.get(i), tac); + rr.initialize(splits.get(i), tac); + while (rr.nextKeyValue()) { + readCount++; + } + } + assertEquals("readCount should be equal to writeCount", readCount, writeCount); + } + +} + + diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileOutputStorageDriver.java.broken hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileOutputStorageDriver.java.broken new file mode 100644 index 0000000..90458cb --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/rcfile/TestRCFileOutputStorageDriver.java.broken @@ -0,0 +1,105 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hcatalog.rcfile; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; +import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.JobID; +import org.apache.hcatalog.common.HCatConstants; +import org.apache.hcatalog.common.HCatException; +import org.apache.hcatalog.common.HCatUtil; +import org.apache.hcatalog.data.HCatRecord; +import org.apache.hcatalog.data.schema.HCatSchema; +import org.apache.hcatalog.mapreduce.HCatInputStorageDriver; +import org.apache.hcatalog.mapreduce.HCatOutputStorageDriver; +import org.apache.hcatalog.mapreduce.OutputJobInfo; +import org.apache.hcatalog.shims.HCatHadoopShims; + +public class TestRCFileOutputStorageDriver extends TestCase { + + public void testConversion() throws IOException { + Configuration conf = new Configuration(); + JobContext jc = HCatHadoopShims.Instance.get().createJobContext(conf, new JobID()); + String jobString = HCatUtil.serialize(OutputJobInfo.create(null,null,null)); + jc.getConfiguration().set(HCatConstants.HCAT_KEY_OUTPUT_INFO,jobString); + + HCatSchema schema = buildHiveSchema(); + HCatInputStorageDriver isd = new RCFileInputDriver(); + + isd.setOriginalSchema(jc, schema); + isd.setOutputSchema(jc, schema); + isd.initialize(jc, new Properties()); + + byte[][] byteArray = buildBytesArray(); + + BytesRefArrayWritable bytesWritable = new BytesRefArrayWritable(byteArray.length); + for (int i = 0; i < byteArray.length; i++) { + BytesRefWritable cu = new BytesRefWritable(byteArray[i], 0, byteArray[i].length); + bytesWritable.set(i, cu); + } + + //Convert byte array to HCatRecord using isd, convert hcatrecord back to byte array + //using osd, compare the two arrays + HCatRecord record = isd.convertToHCatRecord(null, bytesWritable); + + HCatOutputStorageDriver osd = new RCFileOutputDriver(); + + osd.setSchema(jc, schema); + osd.initialize(jc, new Properties()); + + BytesRefArrayWritable bytesWritableOutput = (BytesRefArrayWritable) osd.convertValue(record); + + assertTrue(bytesWritableOutput.compareTo(bytesWritable) == 0); + } + + private byte[][] buildBytesArray() throws UnsupportedEncodingException { + byte[][] bytes = {"123".getBytes("UTF-8"), "456".getBytes("UTF-8"), + "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"), + "5.3".getBytes("UTF-8"), "hcat and hadoop".getBytes("UTF-8"), + new byte[0], "\\N".getBytes("UTF-8") }; + return bytes; + } + + private HCatSchema buildHiveSchema() throws HCatException{ + + List fields = new ArrayList(8); + fields.add(new FieldSchema("atinyint", "tinyint", "")); + fields.add(new FieldSchema("asmallint", "smallint", "")); + fields.add(new FieldSchema("aint", "int", "")); + fields.add(new FieldSchema("along", "bigint", "")); + fields.add(new FieldSchema("adouble", "double", "")); + fields.add(new FieldSchema("astring", "string", "")); + fields.add(new FieldSchema("anullint", "int", "")); + fields.add(new FieldSchema("anullstring", "string", "")); + + return new HCatSchema(HCatUtil.getHCatFieldSchemaList(fields)); + } +} diff --git hcatalog/core/src/test/java/org/apache/hive/hcatalog/security/TestHdfsAuthorizationProvider.java hcatalog/core/src/test/java/org/apache/hive/hcatalog/security/TestHdfsAuthorizationProvider.java new file mode 100644 index 0000000..41dbddd --- /dev/null +++ hcatalog/core/src/test/java/org/apache/hive/hcatalog/security/TestHdfsAuthorizationProvider.java @@ -0,0 +1,583 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.security; + +import static org.apache.hive.hcatalog.HcatTestUtils.perm300; +import static org.apache.hive.hcatalog.HcatTestUtils.perm500; +import static org.apache.hive.hcatalog.HcatTestUtils.perm555; +import static org.apache.hive.hcatalog.HcatTestUtils.perm700; +import static org.apache.hive.hcatalog.HcatTestUtils.perm755; + +import java.io.IOException; +import java.util.Random; + +import junit.framework.Assert; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hive.hcatalog.HcatTestUtils; +import org.apache.hive.hcatalog.cli.HCatDriver; +import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +public class TestHdfsAuthorizationProvider { + + protected HCatDriver hcatDriver; + protected HiveMetaStoreClient msc; + protected HiveConf conf; + protected String whDir; + protected Path whPath; + protected FileSystem whFs; + protected Warehouse wh; + protected Hive hive; + + @Before + public void setUp() throws Exception { + + conf = new HiveConf(this.getClass()); + conf.set(ConfVars.PREEXECHOOKS.varname, ""); + conf.set(ConfVars.POSTEXECHOOKS.varname, ""); + conf.set(ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + + conf.set("hive.metastore.local", "true"); + conf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); + conf.setBoolVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED, true); + conf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, + StorageDelegationAuthorizationProvider.class.getCanonicalName()); + conf.set("fs.pfile.impl", "org.apache.hadoop.fs.ProxyLocalFileSystem"); + + whDir = System.getProperty("test.warehouse.dir", "/tmp/testhdfsauthorization_wh"); + conf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, whDir); + + UserGroupInformation ugi = ShimLoader.getHadoopShims().getUGIForConf(conf); + String username = ShimLoader.getHadoopShims().getShortUserName(ugi); + + whPath = new Path(whDir); + whFs = whPath.getFileSystem(conf); + + wh = new Warehouse(conf); + hive = Hive.get(conf); + + //clean up mess in HMS + HcatTestUtils.cleanupHMS(hive, wh, perm700); + + whFs.delete(whPath, true); + whFs.mkdirs(whPath, perm755); + + SessionState.start(new CliSessionState(conf)); + hcatDriver = new HCatDriver(); + } + + @After + public void tearDown() throws IOException { + whFs.close(); + hcatDriver.close(); + Hive.closeCurrent(); + } + + public Path getDbPath(String dbName) throws MetaException, HiveException { + return HcatTestUtils.getDbPath(hive, wh, dbName); + } + + public Path getTablePath(String dbName, String tableName) throws HiveException { + Table table = hive.getTable(dbName, tableName); + return table.getPath(); + } + + public Path getPartPath(String partName, String dbName, String tableName) throws HiveException { + return new Path(getTablePath(dbName, tableName), partName); + } + + /** Execute the query expecting success*/ + public void exec(String format, Object... args) throws Exception { + String command = String.format(format, args); + CommandProcessorResponse resp = hcatDriver.run(command); + Assert.assertEquals(resp.getErrorMessage(), 0, resp.getResponseCode()); + Assert.assertEquals(resp.getErrorMessage(), null, resp.getErrorMessage()); + } + + /** Execute the query expecting it to fail with AuthorizationException */ + public void execFail(String format, Object... args) throws Exception { + String command = String.format(format, args); + CommandProcessorResponse resp = hcatDriver.run(command); + Assert.assertNotSame(resp.getErrorMessage(), 0, resp.getResponseCode()); + Assert.assertTrue((resp.getResponseCode() == 40000) || (resp.getResponseCode() == 403)); + if (resp.getErrorMessage() != null) { + Assert.assertTrue(resp.getErrorMessage().contains("org.apache.hadoop.security.AccessControlException")); + } + } + + + /** + * Tests whether the warehouse directory is writable by the current user (as defined by Hadoop) + */ + @Test + public void testWarehouseIsWritable() throws Exception { + Path top = new Path(whPath, "_foobarbaz12_"); + try { + whFs.mkdirs(top); + } finally { + whFs.delete(top, true); + } + } + + @Test + public void testShowDatabases() throws Exception { + exec("CREATE DATABASE doo"); + exec("SHOW DATABASES"); + + whFs.setPermission(whPath, perm300); //revoke r + execFail("SHOW DATABASES"); + } + + @Test + public void testDatabaseOps() throws Exception { + exec("SHOW TABLES"); + exec("SHOW TABLE EXTENDED LIKE foo1"); + + whFs.setPermission(whPath, perm700); + exec("CREATE DATABASE doo"); + exec("DESCRIBE DATABASE doo"); + exec("USE doo"); + exec("SHOW TABLES"); + exec("SHOW TABLE EXTENDED LIKE foo1"); + exec("DROP DATABASE doo"); + + //custom location + Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); + whFs.mkdirs(dbPath, perm700); + exec("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); + exec("DESCRIBE DATABASE doo2", dbPath.toUri()); + exec("USE doo2"); + exec("SHOW TABLES"); + exec("SHOW TABLE EXTENDED LIKE foo1"); + exec("DROP DATABASE doo2", dbPath.toUri()); + + //custom non-existing location + exec("CREATE DATABASE doo3 LOCATION '%s/subpath'", dbPath.toUri()); + } + + @Test + public void testCreateDatabaseFail1() throws Exception { + whFs.setPermission(whPath, perm500); + execFail("CREATE DATABASE doo"); //in the default location + + whFs.setPermission(whPath, perm555); + execFail("CREATE DATABASE doo2"); + } + + @Test + public void testCreateDatabaseFail2() throws Exception { + //custom location + Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); + + whFs.mkdirs(dbPath, perm700); + whFs.setPermission(dbPath, perm500); + execFail("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); + } + + @Test + public void testDropDatabaseFail1() throws Exception { + whFs.setPermission(whPath, perm700); + exec("CREATE DATABASE doo"); //in the default location + + whFs.setPermission(getDbPath("doo"), perm500); //revoke write + execFail("DROP DATABASE doo"); + } + + @Test + public void testDropDatabaseFail2() throws Exception { + //custom location + Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); + + whFs.mkdirs(dbPath, perm700); + exec("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); + + whFs.setPermission(dbPath, perm500); + execFail("DROP DATABASE doo2"); + } + + @Test + public void testDescSwitchDatabaseFail() throws Exception { + whFs.setPermission(whPath, perm700); + exec("CREATE DATABASE doo"); + whFs.setPermission(getDbPath("doo"), perm300); //revoke read + execFail("DESCRIBE DATABASE doo"); + execFail("USE doo"); + + //custom location + Path dbPath = new Path(whPath, new Random().nextInt() + "/mydb"); + whFs.mkdirs(dbPath, perm700); + exec("CREATE DATABASE doo2 LOCATION '%s'", dbPath.toUri()); + whFs.mkdirs(dbPath, perm300); //revoke read + execFail("DESCRIBE DATABASE doo2", dbPath.toUri()); + execFail("USE doo2"); + } + + @Test + public void testShowTablesFail() throws Exception { + whFs.setPermission(whPath, perm700); + exec("CREATE DATABASE doo"); + exec("USE doo"); + whFs.setPermission(getDbPath("doo"), perm300); //revoke read + execFail("SHOW TABLES"); + execFail("SHOW TABLE EXTENDED LIKE foo1"); + } + + @Test + public void testTableOps() throws Exception { + //default db + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + exec("DESCRIBE foo1"); + exec("DROP TABLE foo1"); + + //default db custom location + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm700); + exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + exec("DESCRIBE foo2"); + exec("DROP TABLE foo2"); + + //default db custom non existing location + exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s/subpath'", tablePath); + exec("DESCRIBE foo3"); + exec("DROP TABLE foo3"); + + //non default db + exec("CREATE DATABASE doo"); + exec("USE doo"); + + exec("CREATE TABLE foo4 (foo INT) STORED AS RCFILE"); + exec("DESCRIBE foo4"); + exec("DROP TABLE foo4"); + + //non-default db custom location + tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm700); + exec("CREATE EXTERNAL TABLE foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + exec("DESCRIBE foo5"); + exec("DROP TABLE foo5"); + + //non-default db custom non existing location + exec("CREATE EXTERNAL TABLE foo6 (foo INT) STORED AS RCFILE LOCATION '%s/subpath'", tablePath); + exec("DESCRIBE foo6"); + exec("DROP TABLE foo6"); + + exec("DROP TABLE IF EXISTS foo_non_exists"); + + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + exec("DESCRIBE EXTENDED foo1"); + exec("DESCRIBE FORMATTED foo1"); + exec("DESCRIBE foo1.foo"); + + //deep non-existing path for the table + tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm700); + exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s/a/a/a/'", tablePath); + } + + @Test + public void testCreateTableFail1() throws Exception { + //default db + whFs.mkdirs(whPath, perm500); //revoke w + execFail("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + } + + @Test + public void testCreateTableFail2() throws Exception { + //default db custom location + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm500); + execFail("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + + //default db custom non existing location + execFail("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s/subpath'", tablePath); + } + + @Test + public void testCreateTableFail3() throws Exception { + //non default db + exec("CREATE DATABASE doo"); + whFs.setPermission(getDbPath("doo"), perm500); + + execFail("CREATE TABLE doo.foo4 (foo INT) STORED AS RCFILE"); + + //non-default db custom location, permission to write to tablePath, but not on db path + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm700); + exec("USE doo"); + execFail("CREATE EXTERNAL TABLE foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + } + + @Test + public void testCreateTableFail4() throws Exception { + //non default db + exec("CREATE DATABASE doo"); + + //non-default db custom location + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm500); + execFail("CREATE EXTERNAL TABLE doo.foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + + //non-default db custom non existing location + execFail("CREATE EXTERNAL TABLE doo.foo6 (foo INT) STORED AS RCFILE LOCATION '%s/a/a/a/'", tablePath); + } + + @Test + public void testDropTableFail1() throws Exception { + //default db + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + whFs.mkdirs(getTablePath("default", "foo1"), perm500); //revoke w + execFail("DROP TABLE foo1"); + } + + @Test + public void testDropTableFail2() throws Exception { + //default db custom location + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + whFs.mkdirs(tablePath, perm500); + execFail("DROP TABLE foo2"); + } + + @Test + public void testDropTableFail4() throws Exception { + //non default db + exec("CREATE DATABASE doo"); + + //non-default db custom location + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + + exec("CREATE EXTERNAL TABLE doo.foo5 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + whFs.mkdirs(tablePath, perm500); + exec("USE doo"); //There is no DROP TABLE doo.foo5 support in Hive + execFail("DROP TABLE foo5"); + } + + @Test + public void testDescTableFail() throws Exception { + //default db + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + whFs.mkdirs(getTablePath("default", "foo1"), perm300); //revoke read + execFail("DESCRIBE foo1"); + + //default db custom location + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm700); + exec("CREATE EXTERNAL TABLE foo2 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + whFs.mkdirs(tablePath, perm300); //revoke read + execFail("DESCRIBE foo2"); + } + + @Test + public void testAlterTableRename() throws Exception { + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + exec("ALTER TABLE foo1 RENAME TO foo2"); + + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + exec("ALTER TABLE foo3 RENAME TO foo4"); + } + + @Test + public void testAlterTableRenameFail() throws Exception { + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + whFs.mkdirs(getTablePath("default", "foo1"), perm500); //revoke write + execFail("ALTER TABLE foo1 RENAME TO foo2"); + + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", tablePath); + whFs.mkdirs(tablePath, perm500); //revoke write + execFail("ALTER TABLE foo3 RENAME TO foo4"); + } + + @Test + public void testAlterTableRelocate() throws Exception { + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + exec("ALTER TABLE foo1 SET LOCATION '%s'", tablePath.makeQualified(whFs)); + + tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); + exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", + tablePath.makeQualified(whFs)); + tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); + exec("ALTER TABLE foo3 SET LOCATION '%s'", tablePath.makeQualified(whFs)); + } + + @Test + public void testAlterTableRelocateFail() throws Exception { + exec("CREATE TABLE foo1 (foo INT) STORED AS RCFILE"); + Path tablePath = new Path(whPath, new Random().nextInt() + "/mytable"); + whFs.mkdirs(tablePath, perm500); //revoke write + execFail("ALTER TABLE foo1 SET LOCATION '%s'", tablePath.makeQualified(whFs)); + + //dont have access to new table loc + tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); + exec("CREATE EXTERNAL TABLE foo3 (foo INT) STORED AS RCFILE LOCATION '%s'", + tablePath.makeQualified(whFs)); + tablePath = new Path(whPath, new Random().nextInt() + "/mytable2"); + whFs.mkdirs(tablePath, perm500); //revoke write + execFail("ALTER TABLE foo3 SET LOCATION '%s'", tablePath.makeQualified(whFs)); + + //have access to new table loc, but not old table loc + tablePath = new Path(whPath, new Random().nextInt() + "/mytable3"); + exec("CREATE EXTERNAL TABLE foo4 (foo INT) STORED AS RCFILE LOCATION '%s'", + tablePath.makeQualified(whFs)); + whFs.mkdirs(tablePath, perm500); //revoke write + tablePath = new Path(whPath, new Random().nextInt() + "/mytable3"); + execFail("ALTER TABLE foo4 SET LOCATION '%s'", tablePath.makeQualified(whFs)); + } + + @Test + public void testAlterTable() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); + exec("ALTER TABLE foo1 SET TBLPROPERTIES ('foo'='bar')"); + exec("ALTER TABLE foo1 SET SERDEPROPERTIES ('foo'='bar')"); + exec("ALTER TABLE foo1 ADD COLUMNS (foo2 INT)"); + } + + @Test + public void testAddDropPartition() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10')"); + exec("ALTER TABLE foo1 ADD IF NOT EXISTS PARTITION (b='2010-10-10')"); + String relPath = new Random().nextInt() + "/mypart"; + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-11') LOCATION '%s'", relPath); + + exec("ALTER TABLE foo1 PARTITION (b='2010-10-10') SET FILEFORMAT RCFILE"); + + exec("ALTER TABLE foo1 PARTITION (b='2010-10-10') SET FILEFORMAT INPUTFORMAT " + + "'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT " + + "'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' inputdriver " + + "'mydriver' outputdriver 'yourdriver'"); + + exec("ALTER TABLE foo1 DROP PARTITION (b='2010-10-10')"); + exec("ALTER TABLE foo1 DROP PARTITION (b='2010-10-11')"); + } + + @Test + public void testAddPartitionFail1() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); + whFs.mkdirs(getTablePath("default", "foo1"), perm500); + execFail("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10')"); + } + + @Test + public void testAddPartitionFail2() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); + String relPath = new Random().nextInt() + "/mypart"; + Path partPath = new Path(getTablePath("default", "foo1"), relPath); + whFs.mkdirs(partPath, perm500); + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10') LOCATION '%s'", partPath); + } + + @Test + public void testDropPartitionFail1() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10')"); + whFs.mkdirs(getPartPath("b=2010-10-10", "default", "foo1"), perm500); + execFail("ALTER TABLE foo1 DROP PARTITION (b='2010-10-10')"); + } + + @Test + public void testDropPartitionFail2() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS TEXTFILE"); + String relPath = new Random().nextInt() + "/mypart"; + Path partPath = new Path(getTablePath("default", "foo1"), relPath); + whFs.mkdirs(partPath, perm700); + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-10') LOCATION '%s'", partPath); + whFs.mkdirs(partPath, perm500); //revoke write + execFail("ALTER TABLE foo1 DROP PARTITION (b='2010-10-10')"); + } + + @Test + public void testAlterTableFail() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (boo STRING) STORED AS TEXTFILE"); + whFs.mkdirs(getTablePath("default", "foo1"), perm500); //revoke write + execFail("ALTER TABLE foo1 SET TBLPROPERTIES ('foo'='bar')"); + execFail("ALTER TABLE foo1 SET SERDEPROPERTIES ('foo'='bar')"); + execFail("ALTER TABLE foo1 ADD COLUMNS (foo2 INT)"); + } + + @Test + public void testShowTables() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (boo STRING) STORED AS TEXTFILE"); + exec("SHOW PARTITIONS foo1"); + + whFs.mkdirs(getTablePath("default", "foo1"), perm300); //revoke read + execFail("SHOW PARTITIONS foo1"); + } + + @Test + public void testAlterTablePartRename() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); + Path loc = new Path(whPath, new Random().nextInt() + "/mypart"); + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16') LOCATION '%s'", loc); + exec("ALTER TABLE foo1 PARTITION (b='2010-10-16') RENAME TO PARTITION (b='2010-10-17')"); + } + + @Test + public void testAlterTablePartRenameFail() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); + Path loc = new Path(whPath, new Random().nextInt() + "/mypart"); + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16') LOCATION '%s'", loc); + whFs.setPermission(loc, perm500); //revoke w + execFail("ALTER TABLE foo1 PARTITION (b='2010-10-16') RENAME TO PARTITION (b='2010-10-17')"); + } + + @Test + public void testAlterTablePartRelocate() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16')"); + Path partPath = new Path(whPath, new Random().nextInt() + "/mypart"); + exec("ALTER TABLE foo1 PARTITION (b='2010-10-16') SET LOCATION '%s'", partPath.makeQualified(whFs)); + } + + @Test + public void testAlterTablePartRelocateFail() throws Exception { + exec("CREATE TABLE foo1 (foo INT) PARTITIONED BY (b STRING) STORED AS RCFILE"); + + Path oldLoc = new Path(whPath, new Random().nextInt() + "/mypart"); + Path newLoc = new Path(whPath, new Random().nextInt() + "/mypart2"); + + exec("ALTER TABLE foo1 ADD PARTITION (b='2010-10-16') LOCATION '%s'", oldLoc); + whFs.mkdirs(oldLoc, perm500); + execFail("ALTER TABLE foo1 PARTITION (b='2010-10-16') SET LOCATION '%s'", newLoc.makeQualified(whFs)); + whFs.mkdirs(oldLoc, perm700); + whFs.mkdirs(newLoc, perm500); + execFail("ALTER TABLE foo1 PARTITION (b='2010-10-16') SET LOCATION '%s'", newLoc.makeQualified(whFs)); + } + +} diff --git hcatalog/hcatalog-pig-adapter/pom.xml hcatalog/hcatalog-pig-adapter/pom.xml index 10f4f98..09372c6 100644 --- hcatalog/hcatalog-pig-adapter/pom.xml +++ hcatalog/hcatalog-pig-adapter/pom.xml @@ -22,14 +22,13 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> - org.apache.hcatalog + org.apache.hive.hcatalog hcatalog 0.12.0-SNAPSHOT ../pom.xml 4.0.0 - org.apache.hcatalog hcatalog-pig-adapter jar hcatalog-pig-adapter @@ -37,7 +36,7 @@ - org.apache.hcatalog + org.apache.hive.hcatalog hcatalog-core ${hcatalog.version} compile diff --git hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatBaseLoader.java hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatBaseLoader.java deleted file mode 100644 index 3c3afa5..0000000 --- hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatBaseLoader.java +++ /dev/null @@ -1,153 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.pig; - -import java.io.IOException; -import java.util.Arrays; -import java.util.List; -import java.util.Properties; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.InputJobInfo; -import org.apache.hcatalog.mapreduce.PartInfo; -import org.apache.pig.LoadFunc; -import org.apache.pig.LoadMetadata; -import org.apache.pig.LoadPushDown; -import org.apache.pig.PigException; -import org.apache.pig.ResourceStatistics; -import org.apache.pig.backend.executionengine.ExecException; -import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit; -import org.apache.pig.data.Tuple; -import org.apache.pig.impl.logicalLayer.FrontendException; -import org.apache.pig.impl.util.UDFContext; - -/** - * Base class for HCatLoader and HCatEximLoader - */ - -abstract class HCatBaseLoader extends LoadFunc implements LoadMetadata, LoadPushDown { - - protected static final String PRUNE_PROJECTION_INFO = "prune.projection.info"; - - private RecordReader reader; - protected String signature; - - HCatSchema outputSchema = null; - - - @Override - public Tuple getNext() throws IOException { - try { - HCatRecord hr = (HCatRecord) (reader.nextKeyValue() ? reader.getCurrentValue() : null); - Tuple t = PigHCatUtil.transformToTuple(hr, outputSchema); - // TODO : we were discussing an iter interface, and also a LazyTuple - // change this when plans for that solidifies. - return t; - } catch (ExecException e) { - int errCode = 6018; - String errMsg = "Error while reading input"; - throw new ExecException(errMsg, errCode, - PigException.REMOTE_ENVIRONMENT, e); - } catch (Exception eOther) { - int errCode = 6018; - String errMsg = "Error converting read value to tuple"; - throw new ExecException(errMsg, errCode, - PigException.REMOTE_ENVIRONMENT, eOther); - } - - } - - @Override - public void prepareToRead(RecordReader reader, PigSplit arg1) throws IOException { - this.reader = reader; - } - - @Override - public ResourceStatistics getStatistics(String location, Job job) throws IOException { - // statistics not implemented currently - return null; - } - - @Override - public List getFeatures() { - return Arrays.asList(LoadPushDown.OperatorSet.PROJECTION); - } - - @Override - public RequiredFieldResponse pushProjection(RequiredFieldList requiredFieldsInfo) throws FrontendException { - // Store the required fields information in the UDFContext so that we - // can retrieve it later. - storeInUDFContext(signature, PRUNE_PROJECTION_INFO, requiredFieldsInfo); - - // HCat will always prune columns based on what we ask of it - so the - // response is true - return new RequiredFieldResponse(true); - } - - @Override - public void setUDFContextSignature(String signature) { - this.signature = signature; - } - - - // helper methods - protected void storeInUDFContext(String signature, String key, Object value) { - UDFContext udfContext = UDFContext.getUDFContext(); - Properties props = udfContext.getUDFProperties( - this.getClass(), new String[]{signature}); - props.put(key, value); - } - - /** - * A utility method to get the size of inputs. This is accomplished by summing the - * size of all input paths on supported FileSystems. Locations whose size cannot be - * determined are ignored. Note non-FileSystem and unpartitioned locations will not - * report their input size by default. - */ - protected static long getSizeInBytes(InputJobInfo inputJobInfo) throws IOException { - Configuration conf = new Configuration(); - long sizeInBytes = 0; - - for (PartInfo partInfo : inputJobInfo.getPartitions()) { - try { - Path p = new Path(partInfo.getLocation()); - if (p.getFileSystem(conf).isFile(p)) { - sizeInBytes += p.getFileSystem(conf).getFileStatus(p).getLen(); - } else { - FileStatus[] fileStatuses = p.getFileSystem(conf).listStatus(p); - if (fileStatuses != null) { - for (FileStatus child : fileStatuses) { - sizeInBytes += child.getLen(); - } - } - } - } catch (IOException e) { - // Report size to the extent possible. - } - } - - return sizeInBytes; - } -} diff --git hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatBaseStorer.java hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatBaseStorer.java deleted file mode 100644 index ead54e3..0000000 --- hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatBaseStorer.java +++ /dev/null @@ -1,470 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.pig; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatFieldSchema.Type; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.pig.ResourceSchema; -import org.apache.pig.ResourceStatistics; -import org.apache.pig.StoreFunc; -import org.apache.pig.StoreMetadata; -import org.apache.pig.backend.BackendException; -import org.apache.pig.data.DataBag; -import org.apache.pig.data.DataByteArray; -import org.apache.pig.data.DataType; -import org.apache.pig.data.Tuple; -import org.apache.pig.impl.logicalLayer.FrontendException; -import org.apache.pig.impl.logicalLayer.schema.Schema; -import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; -import org.apache.pig.impl.util.ObjectSerializer; -import org.apache.pig.impl.util.UDFContext; -import org.apache.pig.impl.util.Utils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.google.common.collect.Lists; - -/** - * Base class for HCatStorer and HCatEximStorer - * - */ - -abstract class HCatBaseStorer extends StoreFunc implements StoreMetadata { - - private static final Logger LOG = LoggerFactory.getLogger( HCatBaseStorer.class ); - - private static final List SUPPORTED_INTEGER_CONVERSIONS = - Lists.newArrayList(Type.TINYINT, Type.SMALLINT, Type.INT); - protected static final String COMPUTED_OUTPUT_SCHEMA = "hcat.output.schema"; - protected final List partitionKeys; - protected final Map partitions; - protected Schema pigSchema; - private RecordWriter, HCatRecord> writer; - protected HCatSchema computedSchema; - protected static final String PIG_SCHEMA = "hcat.pig.store.schema"; - protected String sign; - - public HCatBaseStorer(String partSpecs, String schema) throws Exception { - - partitionKeys = new ArrayList(); - partitions = new HashMap(); - if (partSpecs != null && !partSpecs.trim().isEmpty()) { - String[] partKVPs = partSpecs.split(","); - for (String partKVP : partKVPs) { - String[] partKV = partKVP.split("="); - if (partKV.length == 2) { - String partKey = partKV[0].trim(); - partitionKeys.add(partKey); - partitions.put(partKey, partKV[1].trim()); - } else { - throw new FrontendException("Invalid partition column specification. " + partSpecs, PigHCatUtil.PIG_EXCEPTION_CODE); - } - } - } - - if (schema != null) { - pigSchema = Utils.getSchemaFromString(schema); - } - - } - - @Override - public void checkSchema(ResourceSchema resourceSchema) throws IOException { - - /* Schema provided by user and the schema computed by Pig - * at the time of calling store must match. - */ - Schema runtimeSchema = Schema.getPigSchema(resourceSchema); - if (pigSchema != null) { - if (!Schema.equals(runtimeSchema, pigSchema, false, true)) { - throw new FrontendException("Schema provided in store statement doesn't match with the Schema" + - "returned by Pig run-time. Schema provided in HCatStorer: " + pigSchema.toString() + " Schema received from Pig runtime: " + runtimeSchema.toString(), PigHCatUtil.PIG_EXCEPTION_CODE); - } - } else { - pigSchema = runtimeSchema; - } - UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[]{sign}).setProperty(PIG_SCHEMA, ObjectSerializer.serialize(pigSchema)); - } - - /** Constructs HCatSchema from pigSchema. Passed tableSchema is the existing - * schema of the table in metastore. - */ - protected HCatSchema convertPigSchemaToHCatSchema(Schema pigSchema, HCatSchema tableSchema) throws FrontendException { - List fieldSchemas = new ArrayList(pigSchema.size()); - for (FieldSchema fSchema : pigSchema.getFields()) { - try { - HCatFieldSchema hcatFieldSchema = getColFromSchema(fSchema.alias, tableSchema); - - fieldSchemas.add(getHCatFSFromPigFS(fSchema, hcatFieldSchema)); - } catch (HCatException he) { - throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); - } - } - return new HCatSchema(fieldSchemas); - } - - public static boolean removeTupleFromBag(HCatFieldSchema hcatFieldSchema, FieldSchema bagFieldSchema) throws HCatException { - if (hcatFieldSchema != null && hcatFieldSchema.getArrayElementSchema().get(0).getType() != Type.STRUCT) { - return true; - } - // Column was not found in table schema. Its a new column - List tupSchema = bagFieldSchema.schema.getFields(); - if (hcatFieldSchema == null && tupSchema.size() == 1 && (tupSchema.get(0).schema == null || (tupSchema.get(0).type == DataType.TUPLE && tupSchema.get(0).schema.size() == 1))) { - return true; - } - return false; - } - - - private HCatFieldSchema getHCatFSFromPigFS(FieldSchema fSchema, HCatFieldSchema hcatFieldSchema) throws FrontendException, HCatException { - byte type = fSchema.type; - switch (type) { - - case DataType.CHARARRAY: - case DataType.BIGCHARARRAY: - return new HCatFieldSchema(fSchema.alias, Type.STRING, null); - - case DataType.INTEGER: - if (hcatFieldSchema != null) { - if (!SUPPORTED_INTEGER_CONVERSIONS.contains(hcatFieldSchema.getType())) { - throw new FrontendException("Unsupported type: " + type + " in Pig's schema", - PigHCatUtil.PIG_EXCEPTION_CODE); - } - return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getType(), null); - } else { - return new HCatFieldSchema(fSchema.alias, Type.INT, null); - } - - case DataType.LONG: - return new HCatFieldSchema(fSchema.alias, Type.BIGINT, null); - - case DataType.FLOAT: - return new HCatFieldSchema(fSchema.alias, Type.FLOAT, null); - - case DataType.DOUBLE: - return new HCatFieldSchema(fSchema.alias, Type.DOUBLE, null); - - case DataType.BYTEARRAY: - return new HCatFieldSchema(fSchema.alias, Type.BINARY, null); - - case DataType.BOOLEAN: - return new HCatFieldSchema(fSchema.alias, Type.BOOLEAN, null); - - case DataType.BAG: - Schema bagSchema = fSchema.schema; - List arrFields = new ArrayList(1); - FieldSchema field; - // Find out if we need to throw away the tuple or not. - if (removeTupleFromBag(hcatFieldSchema, fSchema)) { - field = bagSchema.getField(0).schema.getField(0); - } else { - field = bagSchema.getField(0); - } - arrFields.add(getHCatFSFromPigFS(field, hcatFieldSchema == null ? null : hcatFieldSchema.getArrayElementSchema().get(0))); - return new HCatFieldSchema(fSchema.alias, Type.ARRAY, new HCatSchema(arrFields), ""); - - case DataType.TUPLE: - List fieldNames = new ArrayList(); - List hcatFSs = new ArrayList(); - HCatSchema structSubSchema = hcatFieldSchema == null ? null : hcatFieldSchema.getStructSubSchema(); - List fields = fSchema.schema.getFields(); - for (int i = 0; i < fields.size(); i++) { - FieldSchema fieldSchema = fields.get(i); - fieldNames.add(fieldSchema.alias); - hcatFSs.add(getHCatFSFromPigFS(fieldSchema, structSubSchema == null ? null : structSubSchema.get(i))); - } - return new HCatFieldSchema(fSchema.alias, Type.STRUCT, new HCatSchema(hcatFSs), ""); - - case DataType.MAP: { - // Pig's schema contain no type information about map's keys and - // values. So, if its a new column assume if its existing - // return whatever is contained in the existing column. - - HCatFieldSchema valFS; - List valFSList = new ArrayList(1); - - if (hcatFieldSchema != null) { - return new HCatFieldSchema(fSchema.alias, Type.MAP, Type.STRING, hcatFieldSchema.getMapValueSchema(), ""); - } - - // Column not found in target table. Its a new column. Its schema is map - valFS = new HCatFieldSchema(fSchema.alias, Type.STRING, ""); - valFSList.add(valFS); - return new HCatFieldSchema(fSchema.alias, Type.MAP, Type.STRING, new HCatSchema(valFSList), ""); - } - - default: - throw new FrontendException("Unsupported type: " + type + " in Pig's schema", PigHCatUtil.PIG_EXCEPTION_CODE); - } - } - - @Override - public void prepareToWrite(RecordWriter writer) throws IOException { - this.writer = writer; - computedSchema = (HCatSchema) ObjectSerializer.deserialize(UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[]{sign}).getProperty(COMPUTED_OUTPUT_SCHEMA)); - } - - @Override - public void putNext(Tuple tuple) throws IOException { - - List outgoing = new ArrayList(tuple.size()); - - int i = 0; - for (HCatFieldSchema fSchema : computedSchema.getFields()) { - outgoing.add(getJavaObj(tuple.get(i++), fSchema)); - } - try { - writer.write(null, new DefaultHCatRecord(outgoing)); - } catch (InterruptedException e) { - throw new BackendException("Error while writing tuple: " + tuple, PigHCatUtil.PIG_EXCEPTION_CODE, e); - } - } - - private Object getJavaObj(Object pigObj, HCatFieldSchema hcatFS) throws HCatException, BackendException { - try { - - // The real work-horse. Spend time and energy in this method if there is - // need to keep HCatStorer lean and go fast. - Type type = hcatFS.getType(); - switch (type) { - - case BINARY: - if (pigObj == null) { - return null; - } - return ((DataByteArray) pigObj).get(); - - case STRUCT: - if (pigObj == null) { - return null; - } - HCatSchema structSubSchema = hcatFS.getStructSubSchema(); - // Unwrap the tuple. - List all = ((Tuple) pigObj).getAll(); - ArrayList converted = new ArrayList(all.size()); - for (int i = 0; i < all.size(); i++) { - converted.add(getJavaObj(all.get(i), structSubSchema.get(i))); - } - return converted; - - case ARRAY: - if (pigObj == null) { - return null; - } - // Unwrap the bag. - DataBag pigBag = (DataBag) pigObj; - HCatFieldSchema tupFS = hcatFS.getArrayElementSchema().get(0); - boolean needTuple = tupFS.getType() == Type.STRUCT; - List bagContents = new ArrayList((int) pigBag.size()); - Iterator bagItr = pigBag.iterator(); - - while (bagItr.hasNext()) { - // If there is only one element in tuple contained in bag, we throw away the tuple. - bagContents.add(getJavaObj(needTuple ? bagItr.next() : bagItr.next().get(0), tupFS)); - - } - return bagContents; - case MAP: - if (pigObj == null) { - return null; - } - Map pigMap = (Map) pigObj; - Map typeMap = new HashMap(); - for (Entry entry : pigMap.entrySet()) { - // the value has a schema and not a FieldSchema - typeMap.put( - // Schema validation enforces that the Key is a String - (String) entry.getKey(), - getJavaObj(entry.getValue(), hcatFS.getMapValueSchema().get(0))); - } - return typeMap; - case STRING: - case INT: - case BIGINT: - case FLOAT: - case DOUBLE: - return pigObj; - case SMALLINT: - if (pigObj == null) { - return null; - } - if ((Integer) pigObj < Short.MIN_VALUE || (Integer) pigObj > Short.MAX_VALUE) { - throw new BackendException("Value " + pigObj + " is outside the bounds of column " + - hcatFS.getName() + " with type " + hcatFS.getType(), PigHCatUtil.PIG_EXCEPTION_CODE); - } - return ((Integer) pigObj).shortValue(); - case TINYINT: - if (pigObj == null) { - return null; - } - if ((Integer) pigObj < Byte.MIN_VALUE || (Integer) pigObj > Byte.MAX_VALUE) { - throw new BackendException("Value " + pigObj + " is outside the bounds of column " + - hcatFS.getName() + " with type " + hcatFS.getType(), PigHCatUtil.PIG_EXCEPTION_CODE); - } - return ((Integer) pigObj).byteValue(); - case BOOLEAN: - if (pigObj == null) { - LOG.debug( "HCatBaseStorer.getJavaObj(BOOLEAN): obj null, bailing early" ); - return null; - } - - if( pigObj instanceof String ) { - if( ((String)pigObj).trim().compareTo("0") == 0 ) { - return Boolean.FALSE; - } - if( ((String)pigObj).trim().compareTo("1") == 0 ) { - return Boolean.TRUE; - } - - throw new BackendException( - "Unexpected type " + type + " for value " + pigObj - + (pigObj == null ? "" : " of class " - + pigObj.getClass().getName()), PigHCatUtil.PIG_EXCEPTION_CODE); - } - - return Boolean.parseBoolean( pigObj.toString() ); - default: - throw new BackendException("Unexpected type " + type + " for value " + pigObj - + (pigObj == null ? "" : " of class " - + pigObj.getClass().getName()), PigHCatUtil.PIG_EXCEPTION_CODE); - } - } catch (BackendException e) { - // provide the path to the field in the error message - throw new BackendException( - (hcatFS.getName() == null ? " " : hcatFS.getName() + ".") + e.getMessage(), - e.getCause() == null ? e : e.getCause()); - } - } - - @Override - public String relToAbsPathForStoreLocation(String location, Path curDir) throws IOException { - - // Need to necessarily override this method since default impl assumes HDFS - // based location string. - return location; - } - - @Override - public void setStoreFuncUDFContextSignature(String signature) { - sign = signature; - } - - - protected void doSchemaValidations(Schema pigSchema, HCatSchema tblSchema) throws FrontendException, HCatException { - - // Iterate through all the elements in Pig Schema and do validations as - // dictated by semantics, consult HCatSchema of table when need be. - - for (FieldSchema pigField : pigSchema.getFields()) { - HCatFieldSchema hcatField = getColFromSchema(pigField.alias, tblSchema); - validateSchema(pigField, hcatField); - } - - try { - PigHCatUtil.validateHCatTableSchemaFollowsPigRules(tblSchema); - } catch (IOException e) { - throw new FrontendException("HCatalog schema is not compatible with Pig: " + e.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, e); - } - } - - - private void validateSchema(FieldSchema pigField, HCatFieldSchema hcatField) - throws HCatException, FrontendException { - validateAlias(pigField.alias); - byte type = pigField.type; - if (DataType.isComplex(type)) { - switch (type) { - - case DataType.MAP: - if (hcatField != null) { - if (hcatField.getMapKeyType() != Type.STRING) { - throw new FrontendException("Key Type of map must be String " + hcatField, PigHCatUtil.PIG_EXCEPTION_CODE); - } - // Map values can be primitive or complex - } - break; - - case DataType.BAG: - HCatSchema arrayElementSchema = hcatField == null ? null : hcatField.getArrayElementSchema(); - for (FieldSchema innerField : pigField.schema.getField(0).schema.getFields()) { - validateSchema(innerField, getColFromSchema(pigField.alias, arrayElementSchema)); - } - break; - - case DataType.TUPLE: - HCatSchema structSubSchema = hcatField == null ? null : hcatField.getStructSubSchema(); - for (FieldSchema innerField : pigField.schema.getFields()) { - validateSchema(innerField, getColFromSchema(pigField.alias, structSubSchema)); - } - break; - - default: - throw new FrontendException("Internal Error.", PigHCatUtil.PIG_EXCEPTION_CODE); - } - } - } - - private void validateAlias(String alias) throws FrontendException { - if (alias == null) { - throw new FrontendException("Column name for a field is not specified. Please provide the full schema as an argument to HCatStorer.", PigHCatUtil.PIG_EXCEPTION_CODE); - } - if (alias.matches(".*[A-Z]+.*")) { - throw new FrontendException("Column names should all be in lowercase. Invalid name found: " + alias, PigHCatUtil.PIG_EXCEPTION_CODE); - } - } - - // Finds column by name in HCatSchema, if not found returns null. - private HCatFieldSchema getColFromSchema(String alias, HCatSchema tblSchema) { - if (tblSchema != null) { - for (HCatFieldSchema hcatField : tblSchema.getFields()) { - if (hcatField != null && hcatField.getName() != null && hcatField.getName().equalsIgnoreCase(alias)) { - return hcatField; - } - } - } - // Its a new column - return null; - } - - @Override - public void cleanupOnFailure(String location, Job job) throws IOException { - // No-op. - } - - @Override - public void storeStatistics(ResourceStatistics stats, String arg1, Job job) throws IOException { - } -} diff --git hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatEximLoader.java.broken hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatEximLoader.java.broken deleted file mode 100644 index 33824b8..0000000 --- hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatEximLoader.java.broken +++ /dev/null @@ -1,129 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.pig; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatBaseInputFormat; -import org.apache.hcatalog.mapreduce.HCatEximInputFormat; -import org.apache.hadoop.mapreduce.InputFormat; -import org.apache.hadoop.mapreduce.Job; -import org.apache.pig.Expression; -import org.apache.pig.LoadFunc; -import org.apache.pig.ResourceSchema; -import org.apache.pig.impl.util.UDFContext; - -/** - * Pig {@link LoadFunc} to read data/metadata from hcatalog exported location - */ - -public class HCatEximLoader extends HCatBaseLoader { - - private static final Log LOG = LogFactory.getLog(HCatEximLoader.class); - - private HCatSchema tableSchema; - private HCatSchema partitionSchema; - private HCatEximInputFormat inputFormat; - - public HCatEximLoader() { - LOG.debug("HCatEximLoader ctored"); - } - - @Override - public ResourceSchema getSchema(String location, Job job) throws IOException { - LOG.debug("getSchema with location :" + location); - if (tableSchema == null) { - List rv = HCatEximInputFormat.setInput(job, location, null); - tableSchema = rv.get(0); - partitionSchema = rv.get(1); - } - LOG.debug("getSchema got schema :" + tableSchema.toString()); - List colsPlusPartKeys = new ArrayList(); - colsPlusPartKeys.addAll(tableSchema.getFields()); - colsPlusPartKeys.addAll(partitionSchema.getFields()); - outputSchema = new HCatSchema(colsPlusPartKeys); - return PigHCatUtil.getResourceSchema(outputSchema); - } - - @Override - public String[] getPartitionKeys(String location, Job job) throws IOException { - LOG.warn("getPartitionKeys with location :" + location); - /* - if (tableSchema == null) { - List rv = HCatEximInputFormat.setInput(job, location, null); - tableSchema = rv.get(0); - partitionSchema = rv.get(1); - } - return partitionSchema.getFieldNames().toArray(new String[0]); - */ - return null; - } - - @Override - public void setPartitionFilter(Expression partitionFilter) throws IOException { - LOG.debug("setPartitionFilter with filter :" + partitionFilter.toString()); - } - - @Override - public void setLocation(String location, Job job) throws IOException { - LOG.debug("setLocation with location :" + location); - List rv = HCatEximInputFormat.setInput(job, location, null); - tableSchema = rv.get(0); - partitionSchema = rv.get(1); - List colsPlusPartKeys = new ArrayList(); - colsPlusPartKeys.addAll(tableSchema.getFields()); - colsPlusPartKeys.addAll(partitionSchema.getFields()); - outputSchema = new HCatSchema(colsPlusPartKeys); - UDFContext udfContext = UDFContext.getUDFContext(); - Properties props = udfContext.getUDFProperties(this.getClass(), - new String[] {signature}); - RequiredFieldList requiredFieldsInfo = - (RequiredFieldList) props.get(PRUNE_PROJECTION_INFO); - if (requiredFieldsInfo != null) { - ArrayList fcols = new ArrayList(); - for (RequiredField rf : requiredFieldsInfo.getFields()) { - fcols.add(tableSchema.getFields().get(rf.getIndex())); - } - outputSchema = new HCatSchema(fcols); - try { - HCatBaseInputFormat.setOutputSchema(job, outputSchema); - } catch (Exception e) { - throw new IOException(e); - } - } - } - - - @Override - public InputFormat getInputFormat() throws IOException { - if (inputFormat == null) { - inputFormat = new HCatEximInputFormat(); - } - return inputFormat; - } - -} diff --git hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatEximStorer.java.broken hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatEximStorer.java.broken deleted file mode 100644 index 3e21cc7..0000000 --- hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatEximStorer.java.broken +++ /dev/null @@ -1,152 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.pig; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Properties; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatEximOutputCommitter; -import org.apache.hcatalog.mapreduce.HCatEximOutputFormat; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.pig.ResourceSchema; -import org.apache.pig.impl.logicalLayer.FrontendException; -import org.apache.pig.impl.logicalLayer.schema.Schema; -import org.apache.pig.impl.util.ObjectSerializer; -import org.apache.pig.impl.util.UDFContext; - -/** - * HCatEximStorer. - * - */ - -public class HCatEximStorer extends HCatBaseStorer { - - private static final Log LOG = LogFactory.getLog(HCatEximStorer.class); - - private final String outputLocation; - - public HCatEximStorer(String outputLocation) throws Exception { - this(outputLocation, null, null); - } - - public HCatEximStorer(String outputLocation, String partitionSpec) throws Exception { - this(outputLocation, partitionSpec, null); - } - - public HCatEximStorer(String outputLocation, String partitionSpec, String schema) - throws Exception { - super(partitionSpec, schema); - this.outputLocation = outputLocation; - LOG.debug("HCatEximStorer called"); - } - - @Override - public OutputFormat getOutputFormat() throws IOException { - LOG.debug("getOutputFormat called"); - return new HCatEximOutputFormat(); - } - - @Override - public void setStoreLocation(String location, Job job) throws IOException { - LOG.debug("setStoreLocation called with :" + location); - String[] userStr = location.split("\\."); - String dbname = MetaStoreUtils.DEFAULT_DATABASE_NAME; - String tablename = null; - if (userStr.length == 2) { - dbname = userStr[0]; - tablename = userStr[1]; - } else { - tablename = userStr[0]; - } - Properties p = UDFContext.getUDFContext() - .getUDFProperties(this.getClass(), new String[] {sign}); - Configuration config = job.getConfiguration(); - if (!HCatUtil.checkJobContextIfRunningFromBackend(job)) { - Schema schema = (Schema) ObjectSerializer.deserialize(p.getProperty(PIG_SCHEMA)); - if (schema != null) { - pigSchema = schema; - } - if (pigSchema == null) { - throw new FrontendException("Schema for data cannot be determined.", - PigHCatUtil.PIG_EXCEPTION_CODE); - } - HCatSchema hcatTblSchema = new HCatSchema(new ArrayList()); - try { - doSchemaValidations(pigSchema, hcatTblSchema); - } catch (HCatException he) { - throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); - } - - List hcatFields = new ArrayList(); - List partVals = new ArrayList(); - for (String key : partitionKeys) { - hcatFields.add(new HCatFieldSchema(key, HCatFieldSchema.Type.STRING, "")); - partVals.add(partitions.get(key)); - } - - HCatSchema outputSchema = convertPigSchemaToHCatSchema(pigSchema, - hcatTblSchema); - LOG.debug("Pig Schema '" + pigSchema.toString() + "' was converted to HCatSchema '" - + outputSchema); - HCatEximOutputFormat.setOutput(job, - dbname, tablename, - outputLocation, - new HCatSchema(hcatFields), - partVals, - outputSchema); - p.setProperty(COMPUTED_OUTPUT_SCHEMA, ObjectSerializer.serialize(outputSchema)); - p.setProperty(HCatConstants.HCAT_KEY_OUTPUT_INFO, - config.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - if (config.get(HCatConstants.HCAT_KEY_HIVE_CONF) != null) { - p.setProperty(HCatConstants.HCAT_KEY_HIVE_CONF, - config.get(HCatConstants.HCAT_KEY_HIVE_CONF)); - } - } else { - config.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, - p.getProperty(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - if (p.getProperty(HCatConstants.HCAT_KEY_HIVE_CONF) != null) { - config.set(HCatConstants.HCAT_KEY_HIVE_CONF, - p.getProperty(HCatConstants.HCAT_KEY_HIVE_CONF)); - } - } - } - - @Override - public void storeSchema(ResourceSchema schema, String arg1, Job job) throws IOException { - if( job.getConfiguration().get("mapred.job.tracker", "").equalsIgnoreCase("local") ) { - //In local mode, mapreduce will not call OutputCommitter.cleanupJob. - //Calling it from here so that the partition publish happens. - //This call needs to be removed after MAPREDUCE-1447 is fixed. - new HCatEximOutputCommitter(job,null).cleanupJob(job); - } - } -} diff --git hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatLoader.java hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatLoader.java deleted file mode 100644 index a645ba1..0000000 --- hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatLoader.java +++ /dev/null @@ -1,273 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.pig; - -import java.io.IOException; -import java.util.Enumeration; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Properties; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.mapreduce.InputFormat; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.security.Credentials; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatContext; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.Pair; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.InputJobInfo; -import org.apache.pig.Expression; -import org.apache.pig.Expression.BinaryExpression; -import org.apache.pig.PigException; -import org.apache.pig.ResourceSchema; -import org.apache.pig.ResourceStatistics; -import org.apache.pig.impl.util.UDFContext; - -/** - * Pig {@link org.apache.pig.LoadFunc} to read data from HCat - */ - -public class HCatLoader extends HCatBaseLoader { - - private static final String PARTITION_FILTER = "partition.filter"; // for future use - - private HCatInputFormat hcatInputFormat = null; - private String dbName; - private String tableName; - private String hcatServerUri; - private String partitionFilterString; - private final PigHCatUtil phutil = new PigHCatUtil(); - - // Signature for wrapped loader, see comments in LoadFuncBasedInputDriver.initialize - final public static String INNER_SIGNATURE = "hcatloader.inner.signature"; - final public static String INNER_SIGNATURE_PREFIX = "hcatloader_inner_signature"; - // A hash map which stores job credentials. The key is a signature passed by Pig, which is - //unique to the load func and input file name (table, in our case). - private static Map jobCredentials = new HashMap(); - - @Override - public InputFormat getInputFormat() throws IOException { - if (hcatInputFormat == null) { - hcatInputFormat = new HCatInputFormat(); - } - return hcatInputFormat; - } - - @Override - public String relativeToAbsolutePath(String location, Path curDir) throws IOException { - return location; - } - - @Override - public void setLocation(String location, Job job) throws IOException { - HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() - .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true); - - UDFContext udfContext = UDFContext.getUDFContext(); - Properties udfProps = udfContext.getUDFProperties(this.getClass(), - new String[]{signature}); - job.getConfiguration().set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + signature); - Pair dbTablePair = PigHCatUtil.getDBTableNames(location); - dbName = dbTablePair.first; - tableName = dbTablePair.second; - - RequiredFieldList requiredFieldsInfo = (RequiredFieldList) udfProps - .get(PRUNE_PROJECTION_INFO); - // get partitionFilterString stored in the UDFContext - it would have - // been stored there by an earlier call to setPartitionFilter - // call setInput on HCatInputFormat only in the frontend because internally - // it makes calls to the hcat server - we don't want these to happen in - // the backend - // in the hadoop front end mapred.task.id property will not be set in - // the Configuration - if (udfProps.containsKey(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET)) { - for (Enumeration emr = udfProps.keys(); emr.hasMoreElements(); ) { - PigHCatUtil.getConfigFromUDFProperties(udfProps, - job.getConfiguration(), emr.nextElement().toString()); - } - if (!HCatUtil.checkJobContextIfRunningFromBackend(job)) { - //Combine credentials and credentials from job takes precedence for freshness - Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + signature); - crd.addAll(job.getCredentials()); - job.getCredentials().addAll(crd); - } - } else { - Job clone = new Job(job.getConfiguration()); - HCatInputFormat.setInput(job, dbName, tableName).setFilter(getPartitionFilterString()); - - // We will store all the new /changed properties in the job in the - // udf context, so the the HCatInputFormat.setInput method need not - //be called many times. - for (Entry keyValue : job.getConfiguration()) { - String oldValue = clone.getConfiguration().getRaw(keyValue.getKey()); - if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) { - udfProps.put(keyValue.getKey(), keyValue.getValue()); - } - } - udfProps.put(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET, true); - - //Store credentials in a private hash map and not the udf context to - // make sure they are not public. - Credentials crd = new Credentials(); - crd.addAll(job.getCredentials()); - jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + signature, crd); - } - - // Need to also push projections by calling setOutputSchema on - // HCatInputFormat - we have to get the RequiredFields information - // from the UdfContext, translate it to an Schema and then pass it - // The reason we do this here is because setLocation() is called by - // Pig runtime at InputFormat.getSplits() and - // InputFormat.createRecordReader() time - we are not sure when - // HCatInputFormat needs to know about pruned projections - so doing it - // here will ensure we communicate to HCatInputFormat about pruned - // projections at getSplits() and createRecordReader() time - - if (requiredFieldsInfo != null) { - // convert to hcatschema and pass to HCatInputFormat - try { - outputSchema = phutil.getHCatSchema(requiredFieldsInfo.getFields(), signature, this.getClass()); - HCatInputFormat.setOutputSchema(job, outputSchema); - } catch (Exception e) { - throw new IOException(e); - } - } else { - // else - this means pig's optimizer never invoked the pushProjection - // method - so we need all fields and hence we should not call the - // setOutputSchema on HCatInputFormat - if (HCatUtil.checkJobContextIfRunningFromBackend(job)) { - try { - HCatSchema hcatTableSchema = (HCatSchema) udfProps.get(HCatConstants.HCAT_TABLE_SCHEMA); - outputSchema = hcatTableSchema; - HCatInputFormat.setOutputSchema(job, outputSchema); - } catch (Exception e) { - throw new IOException(e); - } - } - } - - } - - @Override - public String[] getPartitionKeys(String location, Job job) - throws IOException { - Table table = phutil.getTable(location, - hcatServerUri != null ? hcatServerUri : PigHCatUtil.getHCatServerUri(job), - PigHCatUtil.getHCatServerPrincipal(job)); - List tablePartitionKeys = table.getPartitionKeys(); - String[] partitionKeys = new String[tablePartitionKeys.size()]; - for (int i = 0; i < tablePartitionKeys.size(); i++) { - partitionKeys[i] = tablePartitionKeys.get(i).getName(); - } - return partitionKeys; - } - - @Override - public ResourceSchema getSchema(String location, Job job) throws IOException { - HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() - .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true); - - Table table = phutil.getTable(location, - hcatServerUri != null ? hcatServerUri : PigHCatUtil.getHCatServerUri(job), - PigHCatUtil.getHCatServerPrincipal(job)); - HCatSchema hcatTableSchema = HCatUtil.getTableSchemaWithPtnCols(table); - try { - PigHCatUtil.validateHCatTableSchemaFollowsPigRules(hcatTableSchema); - } catch (IOException e) { - throw new PigException( - "Table schema incompatible for reading through HCatLoader :" + e.getMessage() - + ";[Table schema was " + hcatTableSchema.toString() + "]" - , PigHCatUtil.PIG_EXCEPTION_CODE, e); - } - storeInUDFContext(signature, HCatConstants.HCAT_TABLE_SCHEMA, hcatTableSchema); - outputSchema = hcatTableSchema; - return PigHCatUtil.getResourceSchema(hcatTableSchema); - } - - @Override - public void setPartitionFilter(Expression partitionFilter) throws IOException { - // convert the partition filter expression into a string expected by - // hcat and pass it in setLocation() - - partitionFilterString = getHCatComparisonString(partitionFilter); - - // store this in the udf context so we can get it later - storeInUDFContext(signature, - PARTITION_FILTER, partitionFilterString); - } - - /** - * Get statistics about the data to be loaded. Only input data size is implemented at this time. - */ - @Override - public ResourceStatistics getStatistics(String location, Job job) throws IOException { - try { - ResourceStatistics stats = new ResourceStatistics(); - InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize( - job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)); - stats.setmBytes(getSizeInBytes(inputJobInfo) / 1024 / 1024); - return stats; - } catch (Exception e) { - throw new IOException(e); - } - } - - private String getPartitionFilterString() { - if (partitionFilterString == null) { - Properties props = UDFContext.getUDFContext().getUDFProperties( - this.getClass(), new String[]{signature}); - partitionFilterString = props.getProperty(PARTITION_FILTER); - } - return partitionFilterString; - } - - private String getHCatComparisonString(Expression expr) { - if (expr instanceof BinaryExpression) { - // call getHCatComparisonString on lhs and rhs, and and join the - // results with OpType string - - // we can just use OpType.toString() on all Expression types except - // Equal, NotEqualt since Equal has '==' in toString() and - // we need '=' - String opStr = null; - switch (expr.getOpType()) { - case OP_EQ: - opStr = " = "; - break; - default: - opStr = expr.getOpType().toString(); - } - BinaryExpression be = (BinaryExpression) expr; - return "(" + getHCatComparisonString(be.getLhs()) + - opStr + - getHCatComparisonString(be.getRhs()) + ")"; - } else { - // should be a constant or column - return expr.toString(); - } - } - -} diff --git hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatStorer.java hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatStorer.java deleted file mode 100644 index 9445ae9..0000000 --- hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/HCatStorer.java +++ /dev/null @@ -1,167 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.pig; - -import java.io.IOException; -import java.util.Enumeration; -import java.util.HashMap; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Properties; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.OutputFormat; -import org.apache.hadoop.security.Credentials; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatContext; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatOutputFormat; -import org.apache.hcatalog.mapreduce.OutputJobInfo; -import org.apache.pig.PigException; -import org.apache.pig.ResourceSchema; -import org.apache.pig.impl.logicalLayer.FrontendException; -import org.apache.pig.impl.logicalLayer.schema.Schema; -import org.apache.pig.impl.util.ObjectSerializer; -import org.apache.pig.impl.util.UDFContext; - -/** - * HCatStorer. - * - */ - -public class HCatStorer extends HCatBaseStorer { - - // Signature for wrapped storer, see comments in LoadFuncBasedInputDriver.initialize - final public static String INNER_SIGNATURE = "hcatstorer.inner.signature"; - final public static String INNER_SIGNATURE_PREFIX = "hcatstorer_inner_signature"; - // A hash map which stores job credentials. The key is a signature passed by Pig, which is - //unique to the store func and out file name (table, in our case). - private static Map jobCredentials = new HashMap(); - - - public HCatStorer(String partSpecs, String schema) throws Exception { - super(partSpecs, schema); - } - - public HCatStorer(String partSpecs) throws Exception { - this(partSpecs, null); - } - - public HCatStorer() throws Exception { - this(null, null); - } - - @Override - public OutputFormat getOutputFormat() throws IOException { - return new HCatOutputFormat(); - } - - @Override - public void setStoreLocation(String location, Job job) throws IOException { - HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() - .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, false); - - Configuration config = job.getConfiguration(); - config.set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + sign); - Properties udfProps = UDFContext.getUDFContext().getUDFProperties( - this.getClass(), new String[]{sign}); - String[] userStr = location.split("\\."); - - if (udfProps.containsKey(HCatConstants.HCAT_PIG_STORER_LOCATION_SET)) { - for (Enumeration emr = udfProps.keys(); emr.hasMoreElements(); ) { - PigHCatUtil.getConfigFromUDFProperties(udfProps, config, emr.nextElement().toString()); - } - Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + sign); - if (crd != null) { - job.getCredentials().addAll(crd); - } - } else { - Job clone = new Job(job.getConfiguration()); - OutputJobInfo outputJobInfo; - if (userStr.length == 2) { - outputJobInfo = OutputJobInfo.create(userStr[0], userStr[1], partitions); - } else if (userStr.length == 1) { - outputJobInfo = OutputJobInfo.create(null, userStr[0], partitions); - } else { - throw new FrontendException("location " + location - + " is invalid. It must be of the form [db.]table", - PigHCatUtil.PIG_EXCEPTION_CODE); - } - Schema schema = (Schema) ObjectSerializer.deserialize(udfProps.getProperty(PIG_SCHEMA)); - if (schema != null) { - pigSchema = schema; - } - if (pigSchema == null) { - throw new FrontendException( - "Schema for data cannot be determined.", - PigHCatUtil.PIG_EXCEPTION_CODE); - } - String externalLocation = (String) udfProps.getProperty(HCatConstants.HCAT_PIG_STORER_EXTERNAL_LOCATION); - if (externalLocation != null) { - outputJobInfo.setLocation(externalLocation); - } - try { - HCatOutputFormat.setOutput(job, outputJobInfo); - } catch (HCatException he) { - // pass the message to the user - essentially something about - // the table - // information passed to HCatOutputFormat was not right - throw new PigException(he.getMessage(), - PigHCatUtil.PIG_EXCEPTION_CODE, he); - } - HCatSchema hcatTblSchema = HCatOutputFormat.getTableSchema(job); - try { - doSchemaValidations(pigSchema, hcatTblSchema); - } catch (HCatException he) { - throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); - } - computedSchema = convertPigSchemaToHCatSchema(pigSchema, hcatTblSchema); - HCatOutputFormat.setSchema(job, computedSchema); - udfProps.setProperty(COMPUTED_OUTPUT_SCHEMA, ObjectSerializer.serialize(computedSchema)); - - // We will store all the new /changed properties in the job in the - // udf context, so the the HCatOutputFormat.setOutput and setSchema - // methods need not be called many times. - for (Entry keyValue : job.getConfiguration()) { - String oldValue = clone.getConfiguration().getRaw(keyValue.getKey()); - if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) { - udfProps.put(keyValue.getKey(), keyValue.getValue()); - } - } - //Store credentials in a private hash map and not the udf context to - // make sure they are not public. - jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + sign, job.getCredentials()); - udfProps.put(HCatConstants.HCAT_PIG_STORER_LOCATION_SET, true); - } - } - - @Override - public void storeSchema(ResourceSchema schema, String arg1, Job job) throws IOException { - ShimLoader.getHadoopShims().getHCatShim().commitJob(getOutputFormat(), job); - } - - @Override - public void cleanupOnFailure(String location, Job job) throws IOException { - ShimLoader.getHadoopShims().getHCatShim().abortJob(getOutputFormat(), job); - } -} diff --git hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java deleted file mode 100644 index 06f8471..0000000 --- hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hcatalog/pig/PigHCatUtil.java +++ /dev/null @@ -1,492 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.pig; - - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Properties; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.Pair; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatFieldSchema.Type; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.pig.LoadPushDown.RequiredField; -import org.apache.pig.PigException; -import org.apache.pig.ResourceSchema; -import org.apache.pig.ResourceSchema.ResourceFieldSchema; -import org.apache.pig.data.DataBag; -import org.apache.pig.data.DataByteArray; -import org.apache.pig.data.DataType; -import org.apache.pig.data.DefaultDataBag; -import org.apache.pig.data.Tuple; -import org.apache.pig.data.TupleFactory; -import org.apache.pig.impl.logicalLayer.schema.Schema; -import org.apache.pig.impl.util.UDFContext; -import org.apache.pig.impl.util.Utils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -class PigHCatUtil { - - private static final Logger LOG = LoggerFactory.getLogger(PigHCatUtil.class); - - static final int PIG_EXCEPTION_CODE = 1115; // http://wiki.apache.org/pig/PigErrorHandlingFunctionalSpecification#Error_codes - private static final String DEFAULT_DB = MetaStoreUtils.DEFAULT_DATABASE_NAME; - - private final Map, Table> hcatTableCache = - new HashMap, Table>(); - - private static final TupleFactory tupFac = TupleFactory.getInstance(); - - private static boolean pigHasBooleanSupport = false; - - /** - * Determine if the current Pig version supports boolean columns. This works around a - * dependency conflict preventing HCatalog from requiring a version of Pig with boolean - * field support and should be removed once HCATALOG-466 has been resolved. - */ - static { - // DETAILS: - // - // PIG-1429 added support for boolean fields, which shipped in 0.10.0; - // this version of Pig depends on antlr 3.4. - // - // HCatalog depends heavily on Hive, which at this time uses antlr 3.0.1. - // - // antlr 3.0.1 and 3.4 are incompatible, so Pig 0.10.0 and Hive cannot be depended on in the - // same project. Pig 0.8.0 did not use antlr for its parser and can coexist with Hive, - // so that Pig version is depended on by HCatalog at this time. - try { - Schema schema = Utils.getSchemaFromString("myBooleanField: boolean"); - pigHasBooleanSupport = (schema.getField("myBooleanField").type == DataType.BOOLEAN); - } catch (Throwable e) { - // pass - } - - if (!pigHasBooleanSupport) { - LOG.info("This version of Pig does not support boolean fields. To enable " - + "boolean-to-integer conversion, set the " - + HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER - + "=true configuration parameter."); - } - } - - static public boolean pigHasBooleanSupport(){ - return pigHasBooleanSupport; - } - - static public Pair getDBTableNames(String location) throws IOException { - // the location string will be of the form: - // .
- parse it and - // communicate the information to HCatInputFormat - - try { - return HCatUtil.getDbAndTableName(location); - } catch (IOException e) { - String locationErrMsg = "The input location in load statement " + - "should be of the form " + - ".
or
. Got " + location; - throw new PigException(locationErrMsg, PIG_EXCEPTION_CODE); - } - } - - static public String getHCatServerUri(Job job) { - - return job.getConfiguration().get(HiveConf.ConfVars.METASTOREURIS.varname); - } - - static public String getHCatServerPrincipal(Job job) { - - return job.getConfiguration().get(HCatConstants.HCAT_METASTORE_PRINCIPAL); - } - - private static HiveMetaStoreClient getHiveMetaClient(String serverUri, - String serverKerberosPrincipal, Class clazz) throws Exception { - HiveConf hiveConf = new HiveConf(clazz); - - if (serverUri != null) { - hiveConf.set("hive.metastore.local", "false"); - hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, serverUri.trim()); - } - - if (serverKerberosPrincipal != null) { - hiveConf.setBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL, true); - hiveConf.setVar(HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL, serverKerberosPrincipal); - } - - try { - return HCatUtil.getHiveClient(hiveConf); - } catch (Exception e) { - throw new Exception("Could not instantiate a HiveMetaStoreClient connecting to server uri:[" + serverUri + "]", e); - } - } - - - HCatSchema getHCatSchema(List fields, String signature, Class classForUDFCLookup) throws IOException { - if (fields == null) { - return null; - } - - Properties props = UDFContext.getUDFContext().getUDFProperties( - classForUDFCLookup, new String[]{signature}); - HCatSchema hcatTableSchema = (HCatSchema) props.get(HCatConstants.HCAT_TABLE_SCHEMA); - - ArrayList fcols = new ArrayList(); - for (RequiredField rf : fields) { - fcols.add(hcatTableSchema.getFields().get(rf.getIndex())); - } - return new HCatSchema(fcols); - } - - public Table getTable(String location, String hcatServerUri, String hcatServerPrincipal) throws IOException { - Pair loc_server = new Pair(location, hcatServerUri); - Table hcatTable = hcatTableCache.get(loc_server); - if (hcatTable != null) { - return hcatTable; - } - - Pair dbTablePair = PigHCatUtil.getDBTableNames(location); - String dbName = dbTablePair.first; - String tableName = dbTablePair.second; - Table table = null; - HiveMetaStoreClient client = null; - try { - client = getHiveMetaClient(hcatServerUri, hcatServerPrincipal, PigHCatUtil.class); - table = HCatUtil.getTable(client, dbName, tableName); - } catch (NoSuchObjectException nsoe) { - throw new PigException("Table not found : " + nsoe.getMessage(), PIG_EXCEPTION_CODE); // prettier error messages to frontend - } catch (Exception e) { - throw new IOException(e); - } finally { - HCatUtil.closeHiveClientQuietly(client); - } - hcatTableCache.put(loc_server, table); - return table; - } - - public static ResourceSchema getResourceSchema(HCatSchema hcatSchema) throws IOException { - - List rfSchemaList = new ArrayList(); - for (HCatFieldSchema hfs : hcatSchema.getFields()) { - ResourceFieldSchema rfSchema; - rfSchema = getResourceSchemaFromFieldSchema(hfs); - rfSchemaList.add(rfSchema); - } - ResourceSchema rSchema = new ResourceSchema(); - rSchema.setFields(rfSchemaList.toArray(new ResourceFieldSchema[0])); - return rSchema; - - } - - private static ResourceFieldSchema getResourceSchemaFromFieldSchema(HCatFieldSchema hfs) - throws IOException { - ResourceFieldSchema rfSchema; - // if we are dealing with a bag or tuple column - need to worry about subschema - if (hfs.getType() == Type.STRUCT) { - rfSchema = new ResourceFieldSchema() - .setName(hfs.getName()) - .setDescription(hfs.getComment()) - .setType(getPigType(hfs)) - .setSchema(getTupleSubSchema(hfs)); - } else if (hfs.getType() == Type.ARRAY) { - rfSchema = new ResourceFieldSchema() - .setName(hfs.getName()) - .setDescription(hfs.getComment()) - .setType(getPigType(hfs)) - .setSchema(getBagSubSchema(hfs)); - } else { - rfSchema = new ResourceFieldSchema() - .setName(hfs.getName()) - .setDescription(hfs.getComment()) - .setType(getPigType(hfs)) - .setSchema(null); // no munging inner-schemas - } - return rfSchema; - } - - protected static ResourceSchema getBagSubSchema(HCatFieldSchema hfs) throws IOException { - // there are two cases - array and array> - // in either case the element type of the array is represented in a - // tuple field schema in the bag's field schema - the second case (struct) - // more naturally translates to the tuple - in the first case (array) - // we simulate the tuple by putting the single field in a tuple - - Properties props = UDFContext.getUDFContext().getClientSystemProps(); - String innerTupleName = HCatConstants.HCAT_PIG_INNER_TUPLE_NAME_DEFAULT; - if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME)) { - innerTupleName = props.getProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME) - .replaceAll("FIELDNAME", hfs.getName()); - } - String innerFieldName = HCatConstants.HCAT_PIG_INNER_FIELD_NAME_DEFAULT; - if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_FIELD_NAME)) { - innerFieldName = props.getProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME) - .replaceAll("FIELDNAME", hfs.getName()); - } - - ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1]; - bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName) - .setDescription("The tuple in the bag") - .setType(DataType.TUPLE); - HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0); - if (arrayElementFieldSchema.getType() == Type.STRUCT) { - bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema)); - } else if (arrayElementFieldSchema.getType() == Type.ARRAY) { - ResourceSchema s = new ResourceSchema(); - List lrfs = Arrays.asList(getResourceSchemaFromFieldSchema(arrayElementFieldSchema)); - s.setFields(lrfs.toArray(new ResourceFieldSchema[0])); - bagSubFieldSchemas[0].setSchema(s); - } else { - ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; - innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName(innerFieldName) - .setDescription("The inner field in the tuple in the bag") - .setType(getPigType(arrayElementFieldSchema)) - .setSchema(null); // the element type is not a tuple - so no subschema - bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas)); - } - ResourceSchema s = new ResourceSchema().setFields(bagSubFieldSchemas); - return s; - - } - - private static ResourceSchema getTupleSubSchema(HCatFieldSchema hfs) throws IOException { - // for each struct subfield, create equivalent ResourceFieldSchema - ResourceSchema s = new ResourceSchema(); - List lrfs = new ArrayList(); - for (HCatFieldSchema subField : hfs.getStructSubSchema().getFields()) { - lrfs.add(getResourceSchemaFromFieldSchema(subField)); - } - s.setFields(lrfs.toArray(new ResourceFieldSchema[0])); - return s; - } - - /** - * @param hfs the field schema of the column - * @return corresponding pig type - * @throws IOException - */ - static public byte getPigType(HCatFieldSchema hfs) throws IOException { - return getPigType(hfs.getType()); - } - - static public byte getPigType(Type type) throws IOException { - if (type == Type.STRING) { - return DataType.CHARARRAY; - } - - if ((type == Type.INT) || (type == Type.SMALLINT) || (type == Type.TINYINT)) { - return DataType.INTEGER; - } - - if (type == Type.ARRAY) { - return DataType.BAG; - } - - if (type == Type.STRUCT) { - return DataType.TUPLE; - } - - if (type == Type.MAP) { - return DataType.MAP; - } - - if (type == Type.BIGINT) { - return DataType.LONG; - } - - if (type == Type.FLOAT) { - return DataType.FLOAT; - } - - if (type == Type.DOUBLE) { - return DataType.DOUBLE; - } - - if (type == Type.BINARY) { - return DataType.BYTEARRAY; - } - - if (type == Type.BOOLEAN && pigHasBooleanSupport) { - return DataType.BOOLEAN; - } - - throw new PigException("HCatalog column type '" + type.toString() - + "' is not supported in Pig as a column type", PIG_EXCEPTION_CODE); - } - - public static Tuple transformToTuple(HCatRecord hr, HCatSchema hs) throws Exception { - if (hr == null) { - return null; - } - return transformToTuple(hr.getAll(), hs); - } - - @SuppressWarnings("unchecked") - public static Object extractPigObject(Object o, HCatFieldSchema hfs) throws Exception { - Object result; - Type itemType = hfs.getType(); - switch (itemType) { - case BINARY: - result = (o == null) ? null : new DataByteArray((byte[]) o); - break; - case STRUCT: - result = transformToTuple((List) o, hfs); - break; - case ARRAY: - result = transformToBag((List) o, hfs); - break; - case MAP: - result = transformToPigMap((Map) o, hfs); - break; - default: - result = o; - break; - } - return result; - } - - private static Tuple transformToTuple(List objList, HCatFieldSchema hfs) throws Exception { - try { - return transformToTuple(objList, hfs.getStructSubSchema()); - } catch (Exception e) { - if (hfs.getType() != Type.STRUCT) { - throw new Exception("Expected Struct type, got " + hfs.getType(), e); - } else { - throw e; - } - } - } - - private static Tuple transformToTuple(List objList, HCatSchema hs) throws Exception { - if (objList == null) { - return null; - } - Tuple t = tupFac.newTuple(objList.size()); - List subFields = hs.getFields(); - for (int i = 0; i < subFields.size(); i++) { - t.set(i, extractPigObject(objList.get(i), subFields.get(i))); - } - return t; - } - - private static Map transformToPigMap(Map map, HCatFieldSchema hfs) throws Exception { - if (map == null) { - return null; - } - - Map result = new HashMap(); - for (Entry entry : map.entrySet()) { - // since map key for Pig has to be Strings - result.put(entry.getKey().toString(), extractPigObject(entry.getValue(), hfs.getMapValueSchema().get(0))); - } - return result; - } - - @SuppressWarnings("unchecked") - private static DataBag transformToBag(List list, HCatFieldSchema hfs) throws Exception { - if (list == null) { - return null; - } - - HCatFieldSchema elementSubFieldSchema = hfs.getArrayElementSchema().getFields().get(0); - DataBag db = new DefaultDataBag(); - for (Object o : list) { - Tuple tuple; - if (elementSubFieldSchema.getType() == Type.STRUCT) { - tuple = transformToTuple((List) o, elementSubFieldSchema); - } else { - // bags always contain tuples - tuple = tupFac.newTuple(extractPigObject(o, elementSubFieldSchema)); - } - db.add(tuple); - } - return db; - } - - - private static void validateHCatSchemaFollowsPigRules(HCatSchema tblSchema) throws PigException { - for (HCatFieldSchema hcatField : tblSchema.getFields()) { - validateHcatFieldFollowsPigRules(hcatField); - } - } - - private static void validateHcatFieldFollowsPigRules(HCatFieldSchema hcatField) throws PigException { - try { - Type hType = hcatField.getType(); - switch (hType) { - case BOOLEAN: - if (!pigHasBooleanSupport) { - throw new PigException("Incompatible type found in HCat table schema: " - + hcatField, PigHCatUtil.PIG_EXCEPTION_CODE); - } - break; - case ARRAY: - validateHCatSchemaFollowsPigRules(hcatField.getArrayElementSchema()); - break; - case STRUCT: - validateHCatSchemaFollowsPigRules(hcatField.getStructSubSchema()); - break; - case MAP: - // key is only string - if (hcatField.getMapKeyType() != Type.STRING) { - LOG.info("Converting non-String key of map " + hcatField.getName() + " from " - + hcatField.getMapKeyType() + " to String."); - } - validateHCatSchemaFollowsPigRules(hcatField.getMapValueSchema()); - break; - } - } catch (HCatException e) { - throw new PigException("Incompatible type found in hcat table schema: " + hcatField, PigHCatUtil.PIG_EXCEPTION_CODE, e); - } - } - - - public static void validateHCatTableSchemaFollowsPigRules(HCatSchema hcatTableSchema) throws IOException { - validateHCatSchemaFollowsPigRules(hcatTableSchema); - } - - public static void getConfigFromUDFProperties(Properties p, Configuration config, String propName) { - if (p.getProperty(propName) != null) { - config.set(propName, p.getProperty(propName)); - } - } - - public static void saveConfigIntoUDFProperties(Properties p, Configuration config, String propName) { - if (config.get(propName) != null) { - p.setProperty(propName, config.get(propName)); - } - } - -} diff --git hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatBaseLoader.java hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatBaseLoader.java new file mode 100644 index 0000000..b92131f --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatBaseLoader.java @@ -0,0 +1,153 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.pig; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; +import org.apache.hive.hcatalog.mapreduce.PartInfo; +import org.apache.pig.LoadFunc; +import org.apache.pig.LoadMetadata; +import org.apache.pig.LoadPushDown; +import org.apache.pig.PigException; +import org.apache.pig.ResourceStatistics; +import org.apache.pig.backend.executionengine.ExecException; +import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit; +import org.apache.pig.data.Tuple; +import org.apache.pig.impl.logicalLayer.FrontendException; +import org.apache.pig.impl.util.UDFContext; + +/** + * Base class for HCatLoader and HCatEximLoader + */ + +abstract class HCatBaseLoader extends LoadFunc implements LoadMetadata, LoadPushDown { + + protected static final String PRUNE_PROJECTION_INFO = "prune.projection.info"; + + private RecordReader reader; + protected String signature; + + HCatSchema outputSchema = null; + + + @Override + public Tuple getNext() throws IOException { + try { + HCatRecord hr = (HCatRecord) (reader.nextKeyValue() ? reader.getCurrentValue() : null); + Tuple t = PigHCatUtil.transformToTuple(hr, outputSchema); + // TODO : we were discussing an iter interface, and also a LazyTuple + // change this when plans for that solidifies. + return t; + } catch (ExecException e) { + int errCode = 6018; + String errMsg = "Error while reading input"; + throw new ExecException(errMsg, errCode, + PigException.REMOTE_ENVIRONMENT, e); + } catch (Exception eOther) { + int errCode = 6018; + String errMsg = "Error converting read value to tuple"; + throw new ExecException(errMsg, errCode, + PigException.REMOTE_ENVIRONMENT, eOther); + } + + } + + @Override + public void prepareToRead(RecordReader reader, PigSplit arg1) throws IOException { + this.reader = reader; + } + + @Override + public ResourceStatistics getStatistics(String location, Job job) throws IOException { + // statistics not implemented currently + return null; + } + + @Override + public List getFeatures() { + return Arrays.asList(LoadPushDown.OperatorSet.PROJECTION); + } + + @Override + public RequiredFieldResponse pushProjection(RequiredFieldList requiredFieldsInfo) throws FrontendException { + // Store the required fields information in the UDFContext so that we + // can retrieve it later. + storeInUDFContext(signature, PRUNE_PROJECTION_INFO, requiredFieldsInfo); + + // HCat will always prune columns based on what we ask of it - so the + // response is true + return new RequiredFieldResponse(true); + } + + @Override + public void setUDFContextSignature(String signature) { + this.signature = signature; + } + + + // helper methods + protected void storeInUDFContext(String signature, String key, Object value) { + UDFContext udfContext = UDFContext.getUDFContext(); + Properties props = udfContext.getUDFProperties( + this.getClass(), new String[]{signature}); + props.put(key, value); + } + + /** + * A utility method to get the size of inputs. This is accomplished by summing the + * size of all input paths on supported FileSystems. Locations whose size cannot be + * determined are ignored. Note non-FileSystem and unpartitioned locations will not + * report their input size by default. + */ + protected static long getSizeInBytes(InputJobInfo inputJobInfo) throws IOException { + Configuration conf = new Configuration(); + long sizeInBytes = 0; + + for (PartInfo partInfo : inputJobInfo.getPartitions()) { + try { + Path p = new Path(partInfo.getLocation()); + if (p.getFileSystem(conf).isFile(p)) { + sizeInBytes += p.getFileSystem(conf).getFileStatus(p).getLen(); + } else { + FileStatus[] fileStatuses = p.getFileSystem(conf).listStatus(p); + if (fileStatuses != null) { + for (FileStatus child : fileStatuses) { + sizeInBytes += child.getLen(); + } + } + } + } catch (IOException e) { + // Report size to the extent possible. + } + } + + return sizeInBytes; + } +} diff --git hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatBaseStorer.java hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatBaseStorer.java new file mode 100644 index 0000000..67b1999 --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatBaseStorer.java @@ -0,0 +1,470 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.pig; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema.Type; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.pig.ResourceSchema; +import org.apache.pig.ResourceStatistics; +import org.apache.pig.StoreFunc; +import org.apache.pig.StoreMetadata; +import org.apache.pig.backend.BackendException; +import org.apache.pig.data.DataBag; +import org.apache.pig.data.DataByteArray; +import org.apache.pig.data.DataType; +import org.apache.pig.data.Tuple; +import org.apache.pig.impl.logicalLayer.FrontendException; +import org.apache.pig.impl.logicalLayer.schema.Schema; +import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; +import org.apache.pig.impl.util.ObjectSerializer; +import org.apache.pig.impl.util.UDFContext; +import org.apache.pig.impl.util.Utils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.collect.Lists; + +/** + * Base class for HCatStorer and HCatEximStorer + * + */ + +abstract class HCatBaseStorer extends StoreFunc implements StoreMetadata { + + private static final Logger LOG = LoggerFactory.getLogger( HCatBaseStorer.class ); + + private static final List SUPPORTED_INTEGER_CONVERSIONS = + Lists.newArrayList(Type.TINYINT, Type.SMALLINT, Type.INT); + protected static final String COMPUTED_OUTPUT_SCHEMA = "hcat.output.schema"; + protected final List partitionKeys; + protected final Map partitions; + protected Schema pigSchema; + private RecordWriter, HCatRecord> writer; + protected HCatSchema computedSchema; + protected static final String PIG_SCHEMA = "hcat.pig.store.schema"; + protected String sign; + + public HCatBaseStorer(String partSpecs, String schema) throws Exception { + + partitionKeys = new ArrayList(); + partitions = new HashMap(); + if (partSpecs != null && !partSpecs.trim().isEmpty()) { + String[] partKVPs = partSpecs.split(","); + for (String partKVP : partKVPs) { + String[] partKV = partKVP.split("="); + if (partKV.length == 2) { + String partKey = partKV[0].trim(); + partitionKeys.add(partKey); + partitions.put(partKey, partKV[1].trim()); + } else { + throw new FrontendException("Invalid partition column specification. " + partSpecs, PigHCatUtil.PIG_EXCEPTION_CODE); + } + } + } + + if (schema != null) { + pigSchema = Utils.getSchemaFromString(schema); + } + + } + + @Override + public void checkSchema(ResourceSchema resourceSchema) throws IOException { + + /* Schema provided by user and the schema computed by Pig + * at the time of calling store must match. + */ + Schema runtimeSchema = Schema.getPigSchema(resourceSchema); + if (pigSchema != null) { + if (!Schema.equals(runtimeSchema, pigSchema, false, true)) { + throw new FrontendException("Schema provided in store statement doesn't match with the Schema" + + "returned by Pig run-time. Schema provided in HCatStorer: " + pigSchema.toString() + " Schema received from Pig runtime: " + runtimeSchema.toString(), PigHCatUtil.PIG_EXCEPTION_CODE); + } + } else { + pigSchema = runtimeSchema; + } + UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[]{sign}).setProperty(PIG_SCHEMA, ObjectSerializer.serialize(pigSchema)); + } + + /** Constructs HCatSchema from pigSchema. Passed tableSchema is the existing + * schema of the table in metastore. + */ + protected HCatSchema convertPigSchemaToHCatSchema(Schema pigSchema, HCatSchema tableSchema) throws FrontendException { + List fieldSchemas = new ArrayList(pigSchema.size()); + for (FieldSchema fSchema : pigSchema.getFields()) { + try { + HCatFieldSchema hcatFieldSchema = getColFromSchema(fSchema.alias, tableSchema); + + fieldSchemas.add(getHCatFSFromPigFS(fSchema, hcatFieldSchema)); + } catch (HCatException he) { + throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); + } + } + return new HCatSchema(fieldSchemas); + } + + public static boolean removeTupleFromBag(HCatFieldSchema hcatFieldSchema, FieldSchema bagFieldSchema) throws HCatException { + if (hcatFieldSchema != null && hcatFieldSchema.getArrayElementSchema().get(0).getType() != Type.STRUCT) { + return true; + } + // Column was not found in table schema. Its a new column + List tupSchema = bagFieldSchema.schema.getFields(); + if (hcatFieldSchema == null && tupSchema.size() == 1 && (tupSchema.get(0).schema == null || (tupSchema.get(0).type == DataType.TUPLE && tupSchema.get(0).schema.size() == 1))) { + return true; + } + return false; + } + + + private HCatFieldSchema getHCatFSFromPigFS(FieldSchema fSchema, HCatFieldSchema hcatFieldSchema) throws FrontendException, HCatException { + byte type = fSchema.type; + switch (type) { + + case DataType.CHARARRAY: + case DataType.BIGCHARARRAY: + return new HCatFieldSchema(fSchema.alias, Type.STRING, null); + + case DataType.INTEGER: + if (hcatFieldSchema != null) { + if (!SUPPORTED_INTEGER_CONVERSIONS.contains(hcatFieldSchema.getType())) { + throw new FrontendException("Unsupported type: " + type + " in Pig's schema", + PigHCatUtil.PIG_EXCEPTION_CODE); + } + return new HCatFieldSchema(fSchema.alias, hcatFieldSchema.getType(), null); + } else { + return new HCatFieldSchema(fSchema.alias, Type.INT, null); + } + + case DataType.LONG: + return new HCatFieldSchema(fSchema.alias, Type.BIGINT, null); + + case DataType.FLOAT: + return new HCatFieldSchema(fSchema.alias, Type.FLOAT, null); + + case DataType.DOUBLE: + return new HCatFieldSchema(fSchema.alias, Type.DOUBLE, null); + + case DataType.BYTEARRAY: + return new HCatFieldSchema(fSchema.alias, Type.BINARY, null); + + case DataType.BOOLEAN: + return new HCatFieldSchema(fSchema.alias, Type.BOOLEAN, null); + + case DataType.BAG: + Schema bagSchema = fSchema.schema; + List arrFields = new ArrayList(1); + FieldSchema field; + // Find out if we need to throw away the tuple or not. + if (removeTupleFromBag(hcatFieldSchema, fSchema)) { + field = bagSchema.getField(0).schema.getField(0); + } else { + field = bagSchema.getField(0); + } + arrFields.add(getHCatFSFromPigFS(field, hcatFieldSchema == null ? null : hcatFieldSchema.getArrayElementSchema().get(0))); + return new HCatFieldSchema(fSchema.alias, Type.ARRAY, new HCatSchema(arrFields), ""); + + case DataType.TUPLE: + List fieldNames = new ArrayList(); + List hcatFSs = new ArrayList(); + HCatSchema structSubSchema = hcatFieldSchema == null ? null : hcatFieldSchema.getStructSubSchema(); + List fields = fSchema.schema.getFields(); + for (int i = 0; i < fields.size(); i++) { + FieldSchema fieldSchema = fields.get(i); + fieldNames.add(fieldSchema.alias); + hcatFSs.add(getHCatFSFromPigFS(fieldSchema, structSubSchema == null ? null : structSubSchema.get(i))); + } + return new HCatFieldSchema(fSchema.alias, Type.STRUCT, new HCatSchema(hcatFSs), ""); + + case DataType.MAP: { + // Pig's schema contain no type information about map's keys and + // values. So, if its a new column assume if its existing + // return whatever is contained in the existing column. + + HCatFieldSchema valFS; + List valFSList = new ArrayList(1); + + if (hcatFieldSchema != null) { + return new HCatFieldSchema(fSchema.alias, Type.MAP, Type.STRING, hcatFieldSchema.getMapValueSchema(), ""); + } + + // Column not found in target table. Its a new column. Its schema is map + valFS = new HCatFieldSchema(fSchema.alias, Type.STRING, ""); + valFSList.add(valFS); + return new HCatFieldSchema(fSchema.alias, Type.MAP, Type.STRING, new HCatSchema(valFSList), ""); + } + + default: + throw new FrontendException("Unsupported type: " + type + " in Pig's schema", PigHCatUtil.PIG_EXCEPTION_CODE); + } + } + + @Override + public void prepareToWrite(RecordWriter writer) throws IOException { + this.writer = writer; + computedSchema = (HCatSchema) ObjectSerializer.deserialize(UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[]{sign}).getProperty(COMPUTED_OUTPUT_SCHEMA)); + } + + @Override + public void putNext(Tuple tuple) throws IOException { + + List outgoing = new ArrayList(tuple.size()); + + int i = 0; + for (HCatFieldSchema fSchema : computedSchema.getFields()) { + outgoing.add(getJavaObj(tuple.get(i++), fSchema)); + } + try { + writer.write(null, new DefaultHCatRecord(outgoing)); + } catch (InterruptedException e) { + throw new BackendException("Error while writing tuple: " + tuple, PigHCatUtil.PIG_EXCEPTION_CODE, e); + } + } + + private Object getJavaObj(Object pigObj, HCatFieldSchema hcatFS) throws HCatException, BackendException { + try { + + // The real work-horse. Spend time and energy in this method if there is + // need to keep HCatStorer lean and go fast. + Type type = hcatFS.getType(); + switch (type) { + + case BINARY: + if (pigObj == null) { + return null; + } + return ((DataByteArray) pigObj).get(); + + case STRUCT: + if (pigObj == null) { + return null; + } + HCatSchema structSubSchema = hcatFS.getStructSubSchema(); + // Unwrap the tuple. + List all = ((Tuple) pigObj).getAll(); + ArrayList converted = new ArrayList(all.size()); + for (int i = 0; i < all.size(); i++) { + converted.add(getJavaObj(all.get(i), structSubSchema.get(i))); + } + return converted; + + case ARRAY: + if (pigObj == null) { + return null; + } + // Unwrap the bag. + DataBag pigBag = (DataBag) pigObj; + HCatFieldSchema tupFS = hcatFS.getArrayElementSchema().get(0); + boolean needTuple = tupFS.getType() == Type.STRUCT; + List bagContents = new ArrayList((int) pigBag.size()); + Iterator bagItr = pigBag.iterator(); + + while (bagItr.hasNext()) { + // If there is only one element in tuple contained in bag, we throw away the tuple. + bagContents.add(getJavaObj(needTuple ? bagItr.next() : bagItr.next().get(0), tupFS)); + + } + return bagContents; + case MAP: + if (pigObj == null) { + return null; + } + Map pigMap = (Map) pigObj; + Map typeMap = new HashMap(); + for (Entry entry : pigMap.entrySet()) { + // the value has a schema and not a FieldSchema + typeMap.put( + // Schema validation enforces that the Key is a String + (String) entry.getKey(), + getJavaObj(entry.getValue(), hcatFS.getMapValueSchema().get(0))); + } + return typeMap; + case STRING: + case INT: + case BIGINT: + case FLOAT: + case DOUBLE: + return pigObj; + case SMALLINT: + if (pigObj == null) { + return null; + } + if ((Integer) pigObj < Short.MIN_VALUE || (Integer) pigObj > Short.MAX_VALUE) { + throw new BackendException("Value " + pigObj + " is outside the bounds of column " + + hcatFS.getName() + " with type " + hcatFS.getType(), PigHCatUtil.PIG_EXCEPTION_CODE); + } + return ((Integer) pigObj).shortValue(); + case TINYINT: + if (pigObj == null) { + return null; + } + if ((Integer) pigObj < Byte.MIN_VALUE || (Integer) pigObj > Byte.MAX_VALUE) { + throw new BackendException("Value " + pigObj + " is outside the bounds of column " + + hcatFS.getName() + " with type " + hcatFS.getType(), PigHCatUtil.PIG_EXCEPTION_CODE); + } + return ((Integer) pigObj).byteValue(); + case BOOLEAN: + if (pigObj == null) { + LOG.debug( "HCatBaseStorer.getJavaObj(BOOLEAN): obj null, bailing early" ); + return null; + } + + if( pigObj instanceof String ) { + if( ((String)pigObj).trim().compareTo("0") == 0 ) { + return Boolean.FALSE; + } + if( ((String)pigObj).trim().compareTo("1") == 0 ) { + return Boolean.TRUE; + } + + throw new BackendException( + "Unexpected type " + type + " for value " + pigObj + + (pigObj == null ? "" : " of class " + + pigObj.getClass().getName()), PigHCatUtil.PIG_EXCEPTION_CODE); + } + + return Boolean.parseBoolean( pigObj.toString() ); + default: + throw new BackendException("Unexpected type " + type + " for value " + pigObj + + (pigObj == null ? "" : " of class " + + pigObj.getClass().getName()), PigHCatUtil.PIG_EXCEPTION_CODE); + } + } catch (BackendException e) { + // provide the path to the field in the error message + throw new BackendException( + (hcatFS.getName() == null ? " " : hcatFS.getName() + ".") + e.getMessage(), + e.getCause() == null ? e : e.getCause()); + } + } + + @Override + public String relToAbsPathForStoreLocation(String location, Path curDir) throws IOException { + + // Need to necessarily override this method since default impl assumes HDFS + // based location string. + return location; + } + + @Override + public void setStoreFuncUDFContextSignature(String signature) { + sign = signature; + } + + + protected void doSchemaValidations(Schema pigSchema, HCatSchema tblSchema) throws FrontendException, HCatException { + + // Iterate through all the elements in Pig Schema and do validations as + // dictated by semantics, consult HCatSchema of table when need be. + + for (FieldSchema pigField : pigSchema.getFields()) { + HCatFieldSchema hcatField = getColFromSchema(pigField.alias, tblSchema); + validateSchema(pigField, hcatField); + } + + try { + PigHCatUtil.validateHCatTableSchemaFollowsPigRules(tblSchema); + } catch (IOException e) { + throw new FrontendException("HCatalog schema is not compatible with Pig: " + e.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, e); + } + } + + + private void validateSchema(FieldSchema pigField, HCatFieldSchema hcatField) + throws HCatException, FrontendException { + validateAlias(pigField.alias); + byte type = pigField.type; + if (DataType.isComplex(type)) { + switch (type) { + + case DataType.MAP: + if (hcatField != null) { + if (hcatField.getMapKeyType() != Type.STRING) { + throw new FrontendException("Key Type of map must be String " + hcatField, PigHCatUtil.PIG_EXCEPTION_CODE); + } + // Map values can be primitive or complex + } + break; + + case DataType.BAG: + HCatSchema arrayElementSchema = hcatField == null ? null : hcatField.getArrayElementSchema(); + for (FieldSchema innerField : pigField.schema.getField(0).schema.getFields()) { + validateSchema(innerField, getColFromSchema(pigField.alias, arrayElementSchema)); + } + break; + + case DataType.TUPLE: + HCatSchema structSubSchema = hcatField == null ? null : hcatField.getStructSubSchema(); + for (FieldSchema innerField : pigField.schema.getFields()) { + validateSchema(innerField, getColFromSchema(pigField.alias, structSubSchema)); + } + break; + + default: + throw new FrontendException("Internal Error.", PigHCatUtil.PIG_EXCEPTION_CODE); + } + } + } + + private void validateAlias(String alias) throws FrontendException { + if (alias == null) { + throw new FrontendException("Column name for a field is not specified. Please provide the full schema as an argument to HCatStorer.", PigHCatUtil.PIG_EXCEPTION_CODE); + } + if (alias.matches(".*[A-Z]+.*")) { + throw new FrontendException("Column names should all be in lowercase. Invalid name found: " + alias, PigHCatUtil.PIG_EXCEPTION_CODE); + } + } + + // Finds column by name in HCatSchema, if not found returns null. + private HCatFieldSchema getColFromSchema(String alias, HCatSchema tblSchema) { + if (tblSchema != null) { + for (HCatFieldSchema hcatField : tblSchema.getFields()) { + if (hcatField != null && hcatField.getName() != null && hcatField.getName().equalsIgnoreCase(alias)) { + return hcatField; + } + } + } + // Its a new column + return null; + } + + @Override + public void cleanupOnFailure(String location, Job job) throws IOException { + // No-op. + } + + @Override + public void storeStatistics(ResourceStatistics stats, String arg1, Job job) throws IOException { + } +} diff --git hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatEximLoader.java.broken hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatEximLoader.java.broken new file mode 100644 index 0000000..33824b8 --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatEximLoader.java.broken @@ -0,0 +1,129 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hcatalog.pig; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hcatalog.data.schema.HCatSchema; +import org.apache.hcatalog.mapreduce.HCatBaseInputFormat; +import org.apache.hcatalog.mapreduce.HCatEximInputFormat; +import org.apache.hadoop.mapreduce.InputFormat; +import org.apache.hadoop.mapreduce.Job; +import org.apache.pig.Expression; +import org.apache.pig.LoadFunc; +import org.apache.pig.ResourceSchema; +import org.apache.pig.impl.util.UDFContext; + +/** + * Pig {@link LoadFunc} to read data/metadata from hcatalog exported location + */ + +public class HCatEximLoader extends HCatBaseLoader { + + private static final Log LOG = LogFactory.getLog(HCatEximLoader.class); + + private HCatSchema tableSchema; + private HCatSchema partitionSchema; + private HCatEximInputFormat inputFormat; + + public HCatEximLoader() { + LOG.debug("HCatEximLoader ctored"); + } + + @Override + public ResourceSchema getSchema(String location, Job job) throws IOException { + LOG.debug("getSchema with location :" + location); + if (tableSchema == null) { + List rv = HCatEximInputFormat.setInput(job, location, null); + tableSchema = rv.get(0); + partitionSchema = rv.get(1); + } + LOG.debug("getSchema got schema :" + tableSchema.toString()); + List colsPlusPartKeys = new ArrayList(); + colsPlusPartKeys.addAll(tableSchema.getFields()); + colsPlusPartKeys.addAll(partitionSchema.getFields()); + outputSchema = new HCatSchema(colsPlusPartKeys); + return PigHCatUtil.getResourceSchema(outputSchema); + } + + @Override + public String[] getPartitionKeys(String location, Job job) throws IOException { + LOG.warn("getPartitionKeys with location :" + location); + /* + if (tableSchema == null) { + List rv = HCatEximInputFormat.setInput(job, location, null); + tableSchema = rv.get(0); + partitionSchema = rv.get(1); + } + return partitionSchema.getFieldNames().toArray(new String[0]); + */ + return null; + } + + @Override + public void setPartitionFilter(Expression partitionFilter) throws IOException { + LOG.debug("setPartitionFilter with filter :" + partitionFilter.toString()); + } + + @Override + public void setLocation(String location, Job job) throws IOException { + LOG.debug("setLocation with location :" + location); + List rv = HCatEximInputFormat.setInput(job, location, null); + tableSchema = rv.get(0); + partitionSchema = rv.get(1); + List colsPlusPartKeys = new ArrayList(); + colsPlusPartKeys.addAll(tableSchema.getFields()); + colsPlusPartKeys.addAll(partitionSchema.getFields()); + outputSchema = new HCatSchema(colsPlusPartKeys); + UDFContext udfContext = UDFContext.getUDFContext(); + Properties props = udfContext.getUDFProperties(this.getClass(), + new String[] {signature}); + RequiredFieldList requiredFieldsInfo = + (RequiredFieldList) props.get(PRUNE_PROJECTION_INFO); + if (requiredFieldsInfo != null) { + ArrayList fcols = new ArrayList(); + for (RequiredField rf : requiredFieldsInfo.getFields()) { + fcols.add(tableSchema.getFields().get(rf.getIndex())); + } + outputSchema = new HCatSchema(fcols); + try { + HCatBaseInputFormat.setOutputSchema(job, outputSchema); + } catch (Exception e) { + throw new IOException(e); + } + } + } + + + @Override + public InputFormat getInputFormat() throws IOException { + if (inputFormat == null) { + inputFormat = new HCatEximInputFormat(); + } + return inputFormat; + } + +} diff --git hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatEximStorer.java.broken hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatEximStorer.java.broken new file mode 100644 index 0000000..3e21cc7 --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatEximStorer.java.broken @@ -0,0 +1,152 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hcatalog.pig; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hcatalog.common.HCatConstants; +import org.apache.hcatalog.common.HCatException; +import org.apache.hcatalog.common.HCatUtil; +import org.apache.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hcatalog.data.schema.HCatSchema; +import org.apache.hcatalog.mapreduce.HCatEximOutputCommitter; +import org.apache.hcatalog.mapreduce.HCatEximOutputFormat; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.pig.ResourceSchema; +import org.apache.pig.impl.logicalLayer.FrontendException; +import org.apache.pig.impl.logicalLayer.schema.Schema; +import org.apache.pig.impl.util.ObjectSerializer; +import org.apache.pig.impl.util.UDFContext; + +/** + * HCatEximStorer. + * + */ + +public class HCatEximStorer extends HCatBaseStorer { + + private static final Log LOG = LogFactory.getLog(HCatEximStorer.class); + + private final String outputLocation; + + public HCatEximStorer(String outputLocation) throws Exception { + this(outputLocation, null, null); + } + + public HCatEximStorer(String outputLocation, String partitionSpec) throws Exception { + this(outputLocation, partitionSpec, null); + } + + public HCatEximStorer(String outputLocation, String partitionSpec, String schema) + throws Exception { + super(partitionSpec, schema); + this.outputLocation = outputLocation; + LOG.debug("HCatEximStorer called"); + } + + @Override + public OutputFormat getOutputFormat() throws IOException { + LOG.debug("getOutputFormat called"); + return new HCatEximOutputFormat(); + } + + @Override + public void setStoreLocation(String location, Job job) throws IOException { + LOG.debug("setStoreLocation called with :" + location); + String[] userStr = location.split("\\."); + String dbname = MetaStoreUtils.DEFAULT_DATABASE_NAME; + String tablename = null; + if (userStr.length == 2) { + dbname = userStr[0]; + tablename = userStr[1]; + } else { + tablename = userStr[0]; + } + Properties p = UDFContext.getUDFContext() + .getUDFProperties(this.getClass(), new String[] {sign}); + Configuration config = job.getConfiguration(); + if (!HCatUtil.checkJobContextIfRunningFromBackend(job)) { + Schema schema = (Schema) ObjectSerializer.deserialize(p.getProperty(PIG_SCHEMA)); + if (schema != null) { + pigSchema = schema; + } + if (pigSchema == null) { + throw new FrontendException("Schema for data cannot be determined.", + PigHCatUtil.PIG_EXCEPTION_CODE); + } + HCatSchema hcatTblSchema = new HCatSchema(new ArrayList()); + try { + doSchemaValidations(pigSchema, hcatTblSchema); + } catch (HCatException he) { + throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); + } + + List hcatFields = new ArrayList(); + List partVals = new ArrayList(); + for (String key : partitionKeys) { + hcatFields.add(new HCatFieldSchema(key, HCatFieldSchema.Type.STRING, "")); + partVals.add(partitions.get(key)); + } + + HCatSchema outputSchema = convertPigSchemaToHCatSchema(pigSchema, + hcatTblSchema); + LOG.debug("Pig Schema '" + pigSchema.toString() + "' was converted to HCatSchema '" + + outputSchema); + HCatEximOutputFormat.setOutput(job, + dbname, tablename, + outputLocation, + new HCatSchema(hcatFields), + partVals, + outputSchema); + p.setProperty(COMPUTED_OUTPUT_SCHEMA, ObjectSerializer.serialize(outputSchema)); + p.setProperty(HCatConstants.HCAT_KEY_OUTPUT_INFO, + config.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); + if (config.get(HCatConstants.HCAT_KEY_HIVE_CONF) != null) { + p.setProperty(HCatConstants.HCAT_KEY_HIVE_CONF, + config.get(HCatConstants.HCAT_KEY_HIVE_CONF)); + } + } else { + config.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, + p.getProperty(HCatConstants.HCAT_KEY_OUTPUT_INFO)); + if (p.getProperty(HCatConstants.HCAT_KEY_HIVE_CONF) != null) { + config.set(HCatConstants.HCAT_KEY_HIVE_CONF, + p.getProperty(HCatConstants.HCAT_KEY_HIVE_CONF)); + } + } + } + + @Override + public void storeSchema(ResourceSchema schema, String arg1, Job job) throws IOException { + if( job.getConfiguration().get("mapred.job.tracker", "").equalsIgnoreCase("local") ) { + //In local mode, mapreduce will not call OutputCommitter.cleanupJob. + //Calling it from here so that the partition publish happens. + //This call needs to be removed after MAPREDUCE-1447 is fixed. + new HCatEximOutputCommitter(job,null).cleanupJob(job); + } + } +} diff --git hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java new file mode 100644 index 0000000..5d01a7e --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatLoader.java @@ -0,0 +1,273 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.pig; + +import java.io.IOException; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.mapreduce.InputFormat; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.security.Credentials; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatContext; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.Pair; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; +import org.apache.pig.Expression; +import org.apache.pig.Expression.BinaryExpression; +import org.apache.pig.PigException; +import org.apache.pig.ResourceSchema; +import org.apache.pig.ResourceStatistics; +import org.apache.pig.impl.util.UDFContext; + +/** + * Pig {@link org.apache.pig.LoadFunc} to read data from HCat + */ + +public class HCatLoader extends HCatBaseLoader { + + private static final String PARTITION_FILTER = "partition.filter"; // for future use + + private HCatInputFormat hcatInputFormat = null; + private String dbName; + private String tableName; + private String hcatServerUri; + private String partitionFilterString; + private final PigHCatUtil phutil = new PigHCatUtil(); + + // Signature for wrapped loader, see comments in LoadFuncBasedInputDriver.initialize + final public static String INNER_SIGNATURE = "hcatloader.inner.signature"; + final public static String INNER_SIGNATURE_PREFIX = "hcatloader_inner_signature"; + // A hash map which stores job credentials. The key is a signature passed by Pig, which is + //unique to the load func and input file name (table, in our case). + private static Map jobCredentials = new HashMap(); + + @Override + public InputFormat getInputFormat() throws IOException { + if (hcatInputFormat == null) { + hcatInputFormat = new HCatInputFormat(); + } + return hcatInputFormat; + } + + @Override + public String relativeToAbsolutePath(String location, Path curDir) throws IOException { + return location; + } + + @Override + public void setLocation(String location, Job job) throws IOException { + HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() + .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true); + + UDFContext udfContext = UDFContext.getUDFContext(); + Properties udfProps = udfContext.getUDFProperties(this.getClass(), + new String[]{signature}); + job.getConfiguration().set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + signature); + Pair dbTablePair = PigHCatUtil.getDBTableNames(location); + dbName = dbTablePair.first; + tableName = dbTablePair.second; + + RequiredFieldList requiredFieldsInfo = (RequiredFieldList) udfProps + .get(PRUNE_PROJECTION_INFO); + // get partitionFilterString stored in the UDFContext - it would have + // been stored there by an earlier call to setPartitionFilter + // call setInput on HCatInputFormat only in the frontend because internally + // it makes calls to the hcat server - we don't want these to happen in + // the backend + // in the hadoop front end mapred.task.id property will not be set in + // the Configuration + if (udfProps.containsKey(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET)) { + for (Enumeration emr = udfProps.keys(); emr.hasMoreElements(); ) { + PigHCatUtil.getConfigFromUDFProperties(udfProps, + job.getConfiguration(), emr.nextElement().toString()); + } + if (!HCatUtil.checkJobContextIfRunningFromBackend(job)) { + //Combine credentials and credentials from job takes precedence for freshness + Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + signature); + crd.addAll(job.getCredentials()); + job.getCredentials().addAll(crd); + } + } else { + Job clone = new Job(job.getConfiguration()); + HCatInputFormat.setInput(job, dbName, tableName).setFilter(getPartitionFilterString()); + + // We will store all the new /changed properties in the job in the + // udf context, so the the HCatInputFormat.setInput method need not + //be called many times. + for (Entry keyValue : job.getConfiguration()) { + String oldValue = clone.getConfiguration().getRaw(keyValue.getKey()); + if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) { + udfProps.put(keyValue.getKey(), keyValue.getValue()); + } + } + udfProps.put(HCatConstants.HCAT_PIG_LOADER_LOCATION_SET, true); + + //Store credentials in a private hash map and not the udf context to + // make sure they are not public. + Credentials crd = new Credentials(); + crd.addAll(job.getCredentials()); + jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + signature, crd); + } + + // Need to also push projections by calling setOutputSchema on + // HCatInputFormat - we have to get the RequiredFields information + // from the UdfContext, translate it to an Schema and then pass it + // The reason we do this here is because setLocation() is called by + // Pig runtime at InputFormat.getSplits() and + // InputFormat.createRecordReader() time - we are not sure when + // HCatInputFormat needs to know about pruned projections - so doing it + // here will ensure we communicate to HCatInputFormat about pruned + // projections at getSplits() and createRecordReader() time + + if (requiredFieldsInfo != null) { + // convert to hcatschema and pass to HCatInputFormat + try { + outputSchema = phutil.getHCatSchema(requiredFieldsInfo.getFields(), signature, this.getClass()); + HCatInputFormat.setOutputSchema(job, outputSchema); + } catch (Exception e) { + throw new IOException(e); + } + } else { + // else - this means pig's optimizer never invoked the pushProjection + // method - so we need all fields and hence we should not call the + // setOutputSchema on HCatInputFormat + if (HCatUtil.checkJobContextIfRunningFromBackend(job)) { + try { + HCatSchema hcatTableSchema = (HCatSchema) udfProps.get(HCatConstants.HCAT_TABLE_SCHEMA); + outputSchema = hcatTableSchema; + HCatInputFormat.setOutputSchema(job, outputSchema); + } catch (Exception e) { + throw new IOException(e); + } + } + } + + } + + @Override + public String[] getPartitionKeys(String location, Job job) + throws IOException { + Table table = phutil.getTable(location, + hcatServerUri != null ? hcatServerUri : PigHCatUtil.getHCatServerUri(job), + PigHCatUtil.getHCatServerPrincipal(job)); + List tablePartitionKeys = table.getPartitionKeys(); + String[] partitionKeys = new String[tablePartitionKeys.size()]; + for (int i = 0; i < tablePartitionKeys.size(); i++) { + partitionKeys[i] = tablePartitionKeys.get(i).getName(); + } + return partitionKeys; + } + + @Override + public ResourceSchema getSchema(String location, Job job) throws IOException { + HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() + .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, true); + + Table table = phutil.getTable(location, + hcatServerUri != null ? hcatServerUri : PigHCatUtil.getHCatServerUri(job), + PigHCatUtil.getHCatServerPrincipal(job)); + HCatSchema hcatTableSchema = HCatUtil.getTableSchemaWithPtnCols(table); + try { + PigHCatUtil.validateHCatTableSchemaFollowsPigRules(hcatTableSchema); + } catch (IOException e) { + throw new PigException( + "Table schema incompatible for reading through HCatLoader :" + e.getMessage() + + ";[Table schema was " + hcatTableSchema.toString() + "]" + , PigHCatUtil.PIG_EXCEPTION_CODE, e); + } + storeInUDFContext(signature, HCatConstants.HCAT_TABLE_SCHEMA, hcatTableSchema); + outputSchema = hcatTableSchema; + return PigHCatUtil.getResourceSchema(hcatTableSchema); + } + + @Override + public void setPartitionFilter(Expression partitionFilter) throws IOException { + // convert the partition filter expression into a string expected by + // hcat and pass it in setLocation() + + partitionFilterString = getHCatComparisonString(partitionFilter); + + // store this in the udf context so we can get it later + storeInUDFContext(signature, + PARTITION_FILTER, partitionFilterString); + } + + /** + * Get statistics about the data to be loaded. Only input data size is implemented at this time. + */ + @Override + public ResourceStatistics getStatistics(String location, Job job) throws IOException { + try { + ResourceStatistics stats = new ResourceStatistics(); + InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize( + job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO)); + stats.setmBytes(getSizeInBytes(inputJobInfo) / 1024 / 1024); + return stats; + } catch (Exception e) { + throw new IOException(e); + } + } + + private String getPartitionFilterString() { + if (partitionFilterString == null) { + Properties props = UDFContext.getUDFContext().getUDFProperties( + this.getClass(), new String[]{signature}); + partitionFilterString = props.getProperty(PARTITION_FILTER); + } + return partitionFilterString; + } + + private String getHCatComparisonString(Expression expr) { + if (expr instanceof BinaryExpression) { + // call getHCatComparisonString on lhs and rhs, and and join the + // results with OpType string + + // we can just use OpType.toString() on all Expression types except + // Equal, NotEqualt since Equal has '==' in toString() and + // we need '=' + String opStr = null; + switch (expr.getOpType()) { + case OP_EQ: + opStr = " = "; + break; + default: + opStr = expr.getOpType().toString(); + } + BinaryExpression be = (BinaryExpression) expr; + return "(" + getHCatComparisonString(be.getLhs()) + + opStr + + getHCatComparisonString(be.getRhs()) + ")"; + } else { + // should be a constant or column + return expr.toString(); + } + } + +} diff --git hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatStorer.java hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatStorer.java new file mode 100644 index 0000000..30ef8a9 --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/HCatStorer.java @@ -0,0 +1,167 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.pig; + +import java.io.IOException; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.hadoop.security.Credentials; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatContext; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.mapreduce.HCatOutputFormat; +import org.apache.hive.hcatalog.mapreduce.OutputJobInfo; +import org.apache.pig.PigException; +import org.apache.pig.ResourceSchema; +import org.apache.pig.impl.logicalLayer.FrontendException; +import org.apache.pig.impl.logicalLayer.schema.Schema; +import org.apache.pig.impl.util.ObjectSerializer; +import org.apache.pig.impl.util.UDFContext; + +/** + * HCatStorer. + * + */ + +public class HCatStorer extends HCatBaseStorer { + + // Signature for wrapped storer, see comments in LoadFuncBasedInputDriver.initialize + final public static String INNER_SIGNATURE = "hcatstorer.inner.signature"; + final public static String INNER_SIGNATURE_PREFIX = "hcatstorer_inner_signature"; + // A hash map which stores job credentials. The key is a signature passed by Pig, which is + //unique to the store func and out file name (table, in our case). + private static Map jobCredentials = new HashMap(); + + + public HCatStorer(String partSpecs, String schema) throws Exception { + super(partSpecs, schema); + } + + public HCatStorer(String partSpecs) throws Exception { + this(partSpecs, null); + } + + public HCatStorer() throws Exception { + this(null, null); + } + + @Override + public OutputFormat getOutputFormat() throws IOException { + return new HCatOutputFormat(); + } + + @Override + public void setStoreLocation(String location, Job job) throws IOException { + HCatContext.INSTANCE.setConf(job.getConfiguration()).getConf().get() + .setBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, false); + + Configuration config = job.getConfiguration(); + config.set(INNER_SIGNATURE, INNER_SIGNATURE_PREFIX + "_" + sign); + Properties udfProps = UDFContext.getUDFContext().getUDFProperties( + this.getClass(), new String[]{sign}); + String[] userStr = location.split("\\."); + + if (udfProps.containsKey(HCatConstants.HCAT_PIG_STORER_LOCATION_SET)) { + for (Enumeration emr = udfProps.keys(); emr.hasMoreElements(); ) { + PigHCatUtil.getConfigFromUDFProperties(udfProps, config, emr.nextElement().toString()); + } + Credentials crd = jobCredentials.get(INNER_SIGNATURE_PREFIX + "_" + sign); + if (crd != null) { + job.getCredentials().addAll(crd); + } + } else { + Job clone = new Job(job.getConfiguration()); + OutputJobInfo outputJobInfo; + if (userStr.length == 2) { + outputJobInfo = OutputJobInfo.create(userStr[0], userStr[1], partitions); + } else if (userStr.length == 1) { + outputJobInfo = OutputJobInfo.create(null, userStr[0], partitions); + } else { + throw new FrontendException("location " + location + + " is invalid. It must be of the form [db.]table", + PigHCatUtil.PIG_EXCEPTION_CODE); + } + Schema schema = (Schema) ObjectSerializer.deserialize(udfProps.getProperty(PIG_SCHEMA)); + if (schema != null) { + pigSchema = schema; + } + if (pigSchema == null) { + throw new FrontendException( + "Schema for data cannot be determined.", + PigHCatUtil.PIG_EXCEPTION_CODE); + } + String externalLocation = (String) udfProps.getProperty(HCatConstants.HCAT_PIG_STORER_EXTERNAL_LOCATION); + if (externalLocation != null) { + outputJobInfo.setLocation(externalLocation); + } + try { + HCatOutputFormat.setOutput(job, outputJobInfo); + } catch (HCatException he) { + // pass the message to the user - essentially something about + // the table + // information passed to HCatOutputFormat was not right + throw new PigException(he.getMessage(), + PigHCatUtil.PIG_EXCEPTION_CODE, he); + } + HCatSchema hcatTblSchema = HCatOutputFormat.getTableSchema(job); + try { + doSchemaValidations(pigSchema, hcatTblSchema); + } catch (HCatException he) { + throw new FrontendException(he.getMessage(), PigHCatUtil.PIG_EXCEPTION_CODE, he); + } + computedSchema = convertPigSchemaToHCatSchema(pigSchema, hcatTblSchema); + HCatOutputFormat.setSchema(job, computedSchema); + udfProps.setProperty(COMPUTED_OUTPUT_SCHEMA, ObjectSerializer.serialize(computedSchema)); + + // We will store all the new /changed properties in the job in the + // udf context, so the the HCatOutputFormat.setOutput and setSchema + // methods need not be called many times. + for (Entry keyValue : job.getConfiguration()) { + String oldValue = clone.getConfiguration().getRaw(keyValue.getKey()); + if ((oldValue == null) || (keyValue.getValue().equals(oldValue) == false)) { + udfProps.put(keyValue.getKey(), keyValue.getValue()); + } + } + //Store credentials in a private hash map and not the udf context to + // make sure they are not public. + jobCredentials.put(INNER_SIGNATURE_PREFIX + "_" + sign, job.getCredentials()); + udfProps.put(HCatConstants.HCAT_PIG_STORER_LOCATION_SET, true); + } + } + + @Override + public void storeSchema(ResourceSchema schema, String arg1, Job job) throws IOException { + ShimLoader.getHadoopShims().getHCatShim().commitJob(getOutputFormat(), job); + } + + @Override + public void cleanupOnFailure(String location, Job job) throws IOException { + ShimLoader.getHadoopShims().getHCatShim().abortJob(getOutputFormat(), job); + } +} diff --git hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java new file mode 100644 index 0000000..9c849cc --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/main/java/org/apache/hive/hcatalog/pig/PigHCatUtil.java @@ -0,0 +1,492 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.pig; + + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.Pair; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema.Type; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.pig.LoadPushDown.RequiredField; +import org.apache.pig.PigException; +import org.apache.pig.ResourceSchema; +import org.apache.pig.ResourceSchema.ResourceFieldSchema; +import org.apache.pig.data.DataBag; +import org.apache.pig.data.DataByteArray; +import org.apache.pig.data.DataType; +import org.apache.pig.data.DefaultDataBag; +import org.apache.pig.data.Tuple; +import org.apache.pig.data.TupleFactory; +import org.apache.pig.impl.logicalLayer.schema.Schema; +import org.apache.pig.impl.util.UDFContext; +import org.apache.pig.impl.util.Utils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class PigHCatUtil { + + private static final Logger LOG = LoggerFactory.getLogger(PigHCatUtil.class); + + static final int PIG_EXCEPTION_CODE = 1115; // http://wiki.apache.org/pig/PigErrorHandlingFunctionalSpecification#Error_codes + private static final String DEFAULT_DB = MetaStoreUtils.DEFAULT_DATABASE_NAME; + + private final Map, Table> hcatTableCache = + new HashMap, Table>(); + + private static final TupleFactory tupFac = TupleFactory.getInstance(); + + private static boolean pigHasBooleanSupport = false; + + /** + * Determine if the current Pig version supports boolean columns. This works around a + * dependency conflict preventing HCatalog from requiring a version of Pig with boolean + * field support and should be removed once HCATALOG-466 has been resolved. + */ + static { + // DETAILS: + // + // PIG-1429 added support for boolean fields, which shipped in 0.10.0; + // this version of Pig depends on antlr 3.4. + // + // HCatalog depends heavily on Hive, which at this time uses antlr 3.0.1. + // + // antlr 3.0.1 and 3.4 are incompatible, so Pig 0.10.0 and Hive cannot be depended on in the + // same project. Pig 0.8.0 did not use antlr for its parser and can coexist with Hive, + // so that Pig version is depended on by HCatalog at this time. + try { + Schema schema = Utils.getSchemaFromString("myBooleanField: boolean"); + pigHasBooleanSupport = (schema.getField("myBooleanField").type == DataType.BOOLEAN); + } catch (Throwable e) { + // pass + } + + if (!pigHasBooleanSupport) { + LOG.info("This version of Pig does not support boolean fields. To enable " + + "boolean-to-integer conversion, set the " + + HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER + + "=true configuration parameter."); + } + } + + static public boolean pigHasBooleanSupport(){ + return pigHasBooleanSupport; + } + + static public Pair getDBTableNames(String location) throws IOException { + // the location string will be of the form: + // .
- parse it and + // communicate the information to HCatInputFormat + + try { + return HCatUtil.getDbAndTableName(location); + } catch (IOException e) { + String locationErrMsg = "The input location in load statement " + + "should be of the form " + + ".
or
. Got " + location; + throw new PigException(locationErrMsg, PIG_EXCEPTION_CODE); + } + } + + static public String getHCatServerUri(Job job) { + + return job.getConfiguration().get(HiveConf.ConfVars.METASTOREURIS.varname); + } + + static public String getHCatServerPrincipal(Job job) { + + return job.getConfiguration().get(HCatConstants.HCAT_METASTORE_PRINCIPAL); + } + + private static HiveMetaStoreClient getHiveMetaClient(String serverUri, + String serverKerberosPrincipal, Class clazz) throws Exception { + HiveConf hiveConf = new HiveConf(clazz); + + if (serverUri != null) { + hiveConf.set("hive.metastore.local", "false"); + hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, serverUri.trim()); + } + + if (serverKerberosPrincipal != null) { + hiveConf.setBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL, true); + hiveConf.setVar(HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL, serverKerberosPrincipal); + } + + try { + return HCatUtil.getHiveClient(hiveConf); + } catch (Exception e) { + throw new Exception("Could not instantiate a HiveMetaStoreClient connecting to server uri:[" + serverUri + "]", e); + } + } + + + HCatSchema getHCatSchema(List fields, String signature, Class classForUDFCLookup) throws IOException { + if (fields == null) { + return null; + } + + Properties props = UDFContext.getUDFContext().getUDFProperties( + classForUDFCLookup, new String[]{signature}); + HCatSchema hcatTableSchema = (HCatSchema) props.get(HCatConstants.HCAT_TABLE_SCHEMA); + + ArrayList fcols = new ArrayList(); + for (RequiredField rf : fields) { + fcols.add(hcatTableSchema.getFields().get(rf.getIndex())); + } + return new HCatSchema(fcols); + } + + public Table getTable(String location, String hcatServerUri, String hcatServerPrincipal) throws IOException { + Pair loc_server = new Pair(location, hcatServerUri); + Table hcatTable = hcatTableCache.get(loc_server); + if (hcatTable != null) { + return hcatTable; + } + + Pair dbTablePair = PigHCatUtil.getDBTableNames(location); + String dbName = dbTablePair.first; + String tableName = dbTablePair.second; + Table table = null; + HiveMetaStoreClient client = null; + try { + client = getHiveMetaClient(hcatServerUri, hcatServerPrincipal, PigHCatUtil.class); + table = HCatUtil.getTable(client, dbName, tableName); + } catch (NoSuchObjectException nsoe) { + throw new PigException("Table not found : " + nsoe.getMessage(), PIG_EXCEPTION_CODE); // prettier error messages to frontend + } catch (Exception e) { + throw new IOException(e); + } finally { + HCatUtil.closeHiveClientQuietly(client); + } + hcatTableCache.put(loc_server, table); + return table; + } + + public static ResourceSchema getResourceSchema(HCatSchema hcatSchema) throws IOException { + + List rfSchemaList = new ArrayList(); + for (HCatFieldSchema hfs : hcatSchema.getFields()) { + ResourceFieldSchema rfSchema; + rfSchema = getResourceSchemaFromFieldSchema(hfs); + rfSchemaList.add(rfSchema); + } + ResourceSchema rSchema = new ResourceSchema(); + rSchema.setFields(rfSchemaList.toArray(new ResourceFieldSchema[0])); + return rSchema; + + } + + private static ResourceFieldSchema getResourceSchemaFromFieldSchema(HCatFieldSchema hfs) + throws IOException { + ResourceFieldSchema rfSchema; + // if we are dealing with a bag or tuple column - need to worry about subschema + if (hfs.getType() == Type.STRUCT) { + rfSchema = new ResourceFieldSchema() + .setName(hfs.getName()) + .setDescription(hfs.getComment()) + .setType(getPigType(hfs)) + .setSchema(getTupleSubSchema(hfs)); + } else if (hfs.getType() == Type.ARRAY) { + rfSchema = new ResourceFieldSchema() + .setName(hfs.getName()) + .setDescription(hfs.getComment()) + .setType(getPigType(hfs)) + .setSchema(getBagSubSchema(hfs)); + } else { + rfSchema = new ResourceFieldSchema() + .setName(hfs.getName()) + .setDescription(hfs.getComment()) + .setType(getPigType(hfs)) + .setSchema(null); // no munging inner-schemas + } + return rfSchema; + } + + protected static ResourceSchema getBagSubSchema(HCatFieldSchema hfs) throws IOException { + // there are two cases - array and array> + // in either case the element type of the array is represented in a + // tuple field schema in the bag's field schema - the second case (struct) + // more naturally translates to the tuple - in the first case (array) + // we simulate the tuple by putting the single field in a tuple + + Properties props = UDFContext.getUDFContext().getClientSystemProps(); + String innerTupleName = HCatConstants.HCAT_PIG_INNER_TUPLE_NAME_DEFAULT; + if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME)) { + innerTupleName = props.getProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME) + .replaceAll("FIELDNAME", hfs.getName()); + } + String innerFieldName = HCatConstants.HCAT_PIG_INNER_FIELD_NAME_DEFAULT; + if (props != null && props.containsKey(HCatConstants.HCAT_PIG_INNER_FIELD_NAME)) { + innerFieldName = props.getProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME) + .replaceAll("FIELDNAME", hfs.getName()); + } + + ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1]; + bagSubFieldSchemas[0] = new ResourceFieldSchema().setName(innerTupleName) + .setDescription("The tuple in the bag") + .setType(DataType.TUPLE); + HCatFieldSchema arrayElementFieldSchema = hfs.getArrayElementSchema().get(0); + if (arrayElementFieldSchema.getType() == Type.STRUCT) { + bagSubFieldSchemas[0].setSchema(getTupleSubSchema(arrayElementFieldSchema)); + } else if (arrayElementFieldSchema.getType() == Type.ARRAY) { + ResourceSchema s = new ResourceSchema(); + List lrfs = Arrays.asList(getResourceSchemaFromFieldSchema(arrayElementFieldSchema)); + s.setFields(lrfs.toArray(new ResourceFieldSchema[0])); + bagSubFieldSchemas[0].setSchema(s); + } else { + ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; + innerTupleFieldSchemas[0] = new ResourceFieldSchema().setName(innerFieldName) + .setDescription("The inner field in the tuple in the bag") + .setType(getPigType(arrayElementFieldSchema)) + .setSchema(null); // the element type is not a tuple - so no subschema + bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas)); + } + ResourceSchema s = new ResourceSchema().setFields(bagSubFieldSchemas); + return s; + + } + + private static ResourceSchema getTupleSubSchema(HCatFieldSchema hfs) throws IOException { + // for each struct subfield, create equivalent ResourceFieldSchema + ResourceSchema s = new ResourceSchema(); + List lrfs = new ArrayList(); + for (HCatFieldSchema subField : hfs.getStructSubSchema().getFields()) { + lrfs.add(getResourceSchemaFromFieldSchema(subField)); + } + s.setFields(lrfs.toArray(new ResourceFieldSchema[0])); + return s; + } + + /** + * @param hfs the field schema of the column + * @return corresponding pig type + * @throws IOException + */ + static public byte getPigType(HCatFieldSchema hfs) throws IOException { + return getPigType(hfs.getType()); + } + + static public byte getPigType(Type type) throws IOException { + if (type == Type.STRING) { + return DataType.CHARARRAY; + } + + if ((type == Type.INT) || (type == Type.SMALLINT) || (type == Type.TINYINT)) { + return DataType.INTEGER; + } + + if (type == Type.ARRAY) { + return DataType.BAG; + } + + if (type == Type.STRUCT) { + return DataType.TUPLE; + } + + if (type == Type.MAP) { + return DataType.MAP; + } + + if (type == Type.BIGINT) { + return DataType.LONG; + } + + if (type == Type.FLOAT) { + return DataType.FLOAT; + } + + if (type == Type.DOUBLE) { + return DataType.DOUBLE; + } + + if (type == Type.BINARY) { + return DataType.BYTEARRAY; + } + + if (type == Type.BOOLEAN && pigHasBooleanSupport) { + return DataType.BOOLEAN; + } + + throw new PigException("HCatalog column type '" + type.toString() + + "' is not supported in Pig as a column type", PIG_EXCEPTION_CODE); + } + + public static Tuple transformToTuple(HCatRecord hr, HCatSchema hs) throws Exception { + if (hr == null) { + return null; + } + return transformToTuple(hr.getAll(), hs); + } + + @SuppressWarnings("unchecked") + public static Object extractPigObject(Object o, HCatFieldSchema hfs) throws Exception { + Object result; + Type itemType = hfs.getType(); + switch (itemType) { + case BINARY: + result = (o == null) ? null : new DataByteArray((byte[]) o); + break; + case STRUCT: + result = transformToTuple((List) o, hfs); + break; + case ARRAY: + result = transformToBag((List) o, hfs); + break; + case MAP: + result = transformToPigMap((Map) o, hfs); + break; + default: + result = o; + break; + } + return result; + } + + private static Tuple transformToTuple(List objList, HCatFieldSchema hfs) throws Exception { + try { + return transformToTuple(objList, hfs.getStructSubSchema()); + } catch (Exception e) { + if (hfs.getType() != Type.STRUCT) { + throw new Exception("Expected Struct type, got " + hfs.getType(), e); + } else { + throw e; + } + } + } + + private static Tuple transformToTuple(List objList, HCatSchema hs) throws Exception { + if (objList == null) { + return null; + } + Tuple t = tupFac.newTuple(objList.size()); + List subFields = hs.getFields(); + for (int i = 0; i < subFields.size(); i++) { + t.set(i, extractPigObject(objList.get(i), subFields.get(i))); + } + return t; + } + + private static Map transformToPigMap(Map map, HCatFieldSchema hfs) throws Exception { + if (map == null) { + return null; + } + + Map result = new HashMap(); + for (Entry entry : map.entrySet()) { + // since map key for Pig has to be Strings + result.put(entry.getKey().toString(), extractPigObject(entry.getValue(), hfs.getMapValueSchema().get(0))); + } + return result; + } + + @SuppressWarnings("unchecked") + private static DataBag transformToBag(List list, HCatFieldSchema hfs) throws Exception { + if (list == null) { + return null; + } + + HCatFieldSchema elementSubFieldSchema = hfs.getArrayElementSchema().getFields().get(0); + DataBag db = new DefaultDataBag(); + for (Object o : list) { + Tuple tuple; + if (elementSubFieldSchema.getType() == Type.STRUCT) { + tuple = transformToTuple((List) o, elementSubFieldSchema); + } else { + // bags always contain tuples + tuple = tupFac.newTuple(extractPigObject(o, elementSubFieldSchema)); + } + db.add(tuple); + } + return db; + } + + + private static void validateHCatSchemaFollowsPigRules(HCatSchema tblSchema) throws PigException { + for (HCatFieldSchema hcatField : tblSchema.getFields()) { + validateHcatFieldFollowsPigRules(hcatField); + } + } + + private static void validateHcatFieldFollowsPigRules(HCatFieldSchema hcatField) throws PigException { + try { + Type hType = hcatField.getType(); + switch (hType) { + case BOOLEAN: + if (!pigHasBooleanSupport) { + throw new PigException("Incompatible type found in HCat table schema: " + + hcatField, PigHCatUtil.PIG_EXCEPTION_CODE); + } + break; + case ARRAY: + validateHCatSchemaFollowsPigRules(hcatField.getArrayElementSchema()); + break; + case STRUCT: + validateHCatSchemaFollowsPigRules(hcatField.getStructSubSchema()); + break; + case MAP: + // key is only string + if (hcatField.getMapKeyType() != Type.STRING) { + LOG.info("Converting non-String key of map " + hcatField.getName() + " from " + + hcatField.getMapKeyType() + " to String."); + } + validateHCatSchemaFollowsPigRules(hcatField.getMapValueSchema()); + break; + } + } catch (HCatException e) { + throw new PigException("Incompatible type found in hcat table schema: " + hcatField, PigHCatUtil.PIG_EXCEPTION_CODE, e); + } + } + + + public static void validateHCatTableSchemaFollowsPigRules(HCatSchema hcatTableSchema) throws IOException { + validateHCatSchemaFollowsPigRules(hcatTableSchema); + } + + public static void getConfigFromUDFProperties(Properties p, Configuration config, String propName) { + if (p.getProperty(propName) != null) { + config.set(propName, p.getProperty(propName)); + } + } + + public static void saveConfigIntoUDFProperties(Properties p, Configuration config, String propName) { + if (config.get(propName) != null) { + p.setProperty(propName, config.get(propName)); + } + } + +} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/HCatStorerWrapper.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/HCatStorerWrapper.java deleted file mode 100644 index 1e0953e..0000000 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/HCatStorerWrapper.java +++ /dev/null @@ -1,72 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.pig; - -import java.io.IOException; -import java.util.Properties; - -import org.apache.hadoop.mapreduce.Job; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.pig.impl.util.UDFContext; - -/** - * This class is used to test the HCAT_PIG_STORER_EXTERNAL_LOCATION property used in HCatStorer. - * When this property is set, HCatStorer writes the output to the location it specifies. Since - * the property can only be set in the UDFContext, we need this simpler wrapper to do three things: - *
    - *
  1. save the external dir specified in the Pig script
  2. - *
  3. set the same UDFContext signature as HCatStorer
  4. - *
  5. before {@link HCatStorer#setStoreLocation(String, Job)}, set the external dir in the UDFContext.
  6. - *
- */ -public class HCatStorerWrapper extends HCatStorer { - - private String sign; - private String externalDir; - - public HCatStorerWrapper(String partSpecs, String schema, String externalDir) throws Exception { - super(partSpecs, schema); - this.externalDir = externalDir; - } - - public HCatStorerWrapper(String partSpecs, String externalDir) throws Exception { - super(partSpecs); - this.externalDir = externalDir; - } - - public HCatStorerWrapper(String externalDir) throws Exception{ - super(); - this.externalDir = externalDir; - } - - @Override - public void setStoreLocation(String location, Job job) throws IOException { - Properties udfProps = UDFContext.getUDFContext().getUDFProperties( - this.getClass(), new String[] { sign }); - udfProps.setProperty(HCatConstants.HCAT_PIG_STORER_EXTERNAL_LOCATION, externalDir); - super.setStoreLocation(location, job); - } - - @Override - public void setStoreFuncUDFContextSignature(String signature) { - sign = signature; - super.setStoreFuncUDFContextSignature(signature); - } -} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/MockLoader.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/MockLoader.java deleted file mode 100644 index 5819788..0000000 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/MockLoader.java +++ /dev/null @@ -1,180 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.pig; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.mapreduce.InputFormat; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.pig.LoadFunc; -import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit; -import org.apache.pig.data.Tuple; - -public class MockLoader extends LoadFunc { - private static final class MockRecordReader extends RecordReader { - @Override - public void close() throws IOException { - } - - @Override - public Object getCurrentKey() throws IOException, InterruptedException { - return "mockKey"; - } - - @Override - public Object getCurrentValue() throws IOException, InterruptedException { - return "mockValue"; - } - - @Override - public float getProgress() throws IOException, InterruptedException { - return 0.5f; - } - - @Override - public void initialize(InputSplit split, TaskAttemptContext arg1) throws IOException, - InterruptedException { - } - - @Override - public boolean nextKeyValue() throws IOException, InterruptedException { - return true; - } - } - - private static final class MockInputSplit extends InputSplit implements Writable { - private String location; - - public MockInputSplit() { - } - - public MockInputSplit(String location) { - this.location = location; - } - - @Override - public String[] getLocations() throws IOException, InterruptedException { - return new String[]{location}; - } - - @Override - public long getLength() throws IOException, InterruptedException { - return 10000000; - } - - @Override - public boolean equals(Object arg0) { - return arg0 == this; - } - - @Override - public int hashCode() { - return location.hashCode(); - } - - @Override - public void readFields(DataInput arg0) throws IOException { - location = arg0.readUTF(); - } - - @Override - public void write(DataOutput arg0) throws IOException { - arg0.writeUTF(location); - } - } - - private static final class MockInputFormat extends InputFormat { - - private final String location; - - public MockInputFormat(String location) { - this.location = location; - } - - @Override - public RecordReader createRecordReader(InputSplit arg0, TaskAttemptContext arg1) - throws IOException, InterruptedException { - return new MockRecordReader(); - } - - @Override - public List getSplits(JobContext arg0) throws IOException, InterruptedException { - return Arrays.asList(new MockInputSplit(location)); - } - } - - private static final Map> locationToData = new HashMap>(); - - public static void setData(String location, Iterable data) { - locationToData.put(location, data); - } - - private String location; - - private Iterator data; - - @Override - public String relativeToAbsolutePath(String location, Path curDir) throws IOException { - return location; - } - - @Override - public void setLocation(String location, Job job) throws IOException { - this.location = location; - if (location == null) { - throw new IOException("null location passed to MockLoader"); - } - this.data = locationToData.get(location).iterator(); - if (this.data == null) { - throw new IOException("No data configured for location: " + location); - } - } - - @Override - public Tuple getNext() throws IOException { - if (data == null) { - throw new IOException("data was not correctly initialized in MockLoader"); - } - return data.hasNext() ? data.next() : null; - } - - @Override - public InputFormat getInputFormat() throws IOException { - return new MockInputFormat(location); - } - - @Override - public void prepareToRead(RecordReader arg0, PigSplit arg1) throws IOException { - } - -} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/MyPigStorage.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/MyPigStorage.java deleted file mode 100644 index 9de6be3..0000000 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/MyPigStorage.java +++ /dev/null @@ -1,40 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.pig; - -import java.io.IOException; - -import org.apache.pig.builtin.PigStorage; -import org.apache.pig.data.Tuple; - -public class MyPigStorage extends PigStorage { - - String arg2; - - public MyPigStorage(String arg1, String arg2) throws IOException { - super(arg1); - this.arg2 = arg2; - } - - @Override - public void putNext(Tuple t) throws IOException { - t.append(arg2); - super.putNext(t); - } -} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestE2EScenarios.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestE2EScenarios.java deleted file mode 100644 index 9b8e429..0000000 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestE2EScenarios.java +++ /dev/null @@ -1,230 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.pig; - -import java.io.File; -import java.io.IOException; -import java.util.HashMap; -import java.util.Iterator; - -import junit.framework.TestCase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.TaskAttemptID; -import org.apache.hcatalog.HcatTestUtils; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatContext; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.HCatOutputFormat; -import org.apache.hcatalog.mapreduce.OutputJobInfo; -import org.apache.hcatalog.mapreduce.HCatMapRedUtil; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.data.Tuple; - -public class TestE2EScenarios extends TestCase { - - private static final String TEST_DATA_DIR = System.getProperty("user.dir") + - "/build/test/data/" + TestHCatLoader.class.getCanonicalName(); - private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; - - private static final String TEXTFILE_LOCN = TEST_DATA_DIR + "/textfile"; - - private static Driver driver; - - protected String storageFormat() { - return "orc"; - } - - @Override - protected void setUp() throws Exception { - - File f = new File(TEST_WAREHOUSE_DIR); - if (f.exists()) { - FileUtil.fullyDelete(f); - } - new File(TEST_WAREHOUSE_DIR).mkdirs(); - - HiveConf hiveConf = new HiveConf(this.getClass()); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); - driver = new Driver(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); - - } - - @Override - protected void tearDown() throws Exception { - dropTable("inpy"); - dropTable("rc5318"); - dropTable("orc5318"); - } - - private void dropTable(String tablename) throws IOException, CommandNeedRetryException { - driver.run("drop table " + tablename); - } - - private void createTable(String tablename, String schema, String partitionedBy, String storageFormat) throws IOException, CommandNeedRetryException { - String createTable; - createTable = "create table " + tablename + "(" + schema + ") "; - if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { - createTable = createTable + "partitioned by (" + partitionedBy + ") "; - } - if (storageFormat != null){ - createTable = createTable + "stored as " +storageFormat; - } - driverRun(createTable); - } - - private void driverRun(String cmd) throws IOException, CommandNeedRetryException { - int retCode = driver.run(cmd).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to run [" - + cmd + "], return code from hive driver : [" + retCode + "]"); - } - } - - private void pigDump(String tableName) throws IOException { - PigServer server = new PigServer(ExecType.LOCAL); - - System.err.println("==="); - System.err.println(tableName+":"); - server.registerQuery("X = load '" + tableName - + "' using org.apache.hcatalog.pig.HCatLoader();"); - Iterator XIter = server.openIterator("X"); - while (XIter.hasNext()) { - Tuple t = XIter.next(); - for (Object o : t.getAll()){ - System.err.print( - "\t(" + o.getClass().getName() + ":" - + o.toString() + ")" - ); - } - System.err.println(""); - } - System.err.println("==="); - } - - - private void copyTable(String in, String out) throws IOException, InterruptedException { - Job ijob = new Job(); - Job ojob = new Job(); - HCatInputFormat inpy = new HCatInputFormat(); - inpy.setInput(ijob , null, in); - HCatOutputFormat oupy = new HCatOutputFormat(); - oupy.setOutput(ojob, - OutputJobInfo.create(null, out, new HashMap() - )); - - // Test HCatContext - - System.err.println("HCatContext INSTANCE is present : " +HCatContext.INSTANCE.getConf().isPresent()); - if (HCatContext.INSTANCE.getConf().isPresent()){ - System.err.println("HCatContext tinyint->int promotion says " + - HCatContext.INSTANCE.getConf().get().getBoolean( - HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, - HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)); - } - - HCatSchema tableSchema = inpy.getTableSchema(ijob.getConfiguration()); - System.err.println("Copying from ["+in+"] to ["+out+"] with schema : "+ tableSchema.toString()); - oupy.setSchema(ojob, tableSchema); - oupy.checkOutputSpecs(ojob); - OutputCommitter oc = oupy.getOutputCommitter(createTaskAttemptContext(ojob.getConfiguration())); - oc.setupJob(ojob); - - for (InputSplit split : inpy.getSplits(ijob)){ - - TaskAttemptContext rtaskContext = createTaskAttemptContext(ijob.getConfiguration()); - TaskAttemptContext wtaskContext = createTaskAttemptContext(ojob.getConfiguration()); - - RecordReader rr = inpy.createRecordReader(split, rtaskContext); - rr.initialize(split, rtaskContext); - - OutputCommitter taskOc = oupy.getOutputCommitter(wtaskContext); - taskOc.setupTask(wtaskContext); - RecordWriter, HCatRecord> rw = oupy.getRecordWriter(wtaskContext); - - while(rr.nextKeyValue()){ - rw.write(rr.getCurrentKey(), rr.getCurrentValue()); - } - rw.close(wtaskContext); - taskOc.commitTask(wtaskContext); - rr.close(); - } - - oc.commitJob(ojob); - } - - private TaskAttemptContext createTaskAttemptContext(Configuration tconf) { - Configuration conf = (tconf == null) ? (new Configuration()) : tconf; - TaskAttemptID taskId = new TaskAttemptID(); - conf.setInt("mapred.task.partition", taskId.getId()); - conf.set("mapred.task.id", "attempt__0000_r_000000_" + taskId.getId()); - TaskAttemptContext rtaskContext = HCatMapRedUtil.createTaskAttemptContext(conf , taskId); - return rtaskContext; - } - - - public void testReadOrcAndRCFromPig() throws Exception { - String tableSchema = "ti tinyint, si smallint,i int, bi bigint, f float, d double, b boolean"; - - HcatTestUtils.createTestDataFile(TEXTFILE_LOCN, - new String[]{ - "-3\0019001\00186400\0014294967297\00134.532\0012184239842983489.1231231234\001true" - ,"0\0010\0010\0010\0010\0010\001false" - } - ); - - // write this out to a file, and import it into hive - createTable("inpy",tableSchema,null,"textfile"); - createTable("rc5318",tableSchema,null,"rcfile"); - createTable("orc5318",tableSchema,null,"orc"); - driverRun("LOAD DATA LOCAL INPATH '"+TEXTFILE_LOCN+"' OVERWRITE INTO TABLE inpy"); - - // write it out from hive to an rcfile table, and to an orc table -// driverRun("insert overwrite table rc5318 select * from inpy"); - copyTable("inpy","rc5318"); -// driverRun("insert overwrite table orc5318 select * from inpy"); - copyTable("inpy","orc5318"); - - pigDump("inpy"); - pigDump("rc5318"); - pigDump("orc5318"); - - } - -} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatEximLoader.java.broken hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatEximLoader.java.broken deleted file mode 100644 index 238edb2..0000000 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatEximLoader.java.broken +++ /dev/null @@ -1,352 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.pig; - -import java.io.IOException; -import java.util.Iterator; -import java.util.Map; -import java.util.Properties; -import java.util.TreeMap; - -import junit.framework.TestCase; - -import org.apache.hcatalog.MiniCluster; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.backend.executionengine.ExecException; -import org.apache.pig.data.Tuple; -import org.apache.pig.impl.util.UDFContext; - -/** - * - * TestHCatEximLoader. Assumes Exim storer is working well - * - */ -public class TestHCatEximLoader extends TestCase { - - private static final String NONPART_TABLE = "junit_unparted"; - private static final String PARTITIONED_TABLE = "junit_parted"; - private static MiniCluster cluster = MiniCluster.buildCluster(); - - private static final String dataLocation = "/tmp/data"; - private static String fqdataLocation; - private static final String exportLocation = "/tmp/export"; - private static String fqexportLocation; - - private static Properties props; - - private void cleanup() throws IOException { - MiniCluster.deleteFile(cluster, dataLocation); - MiniCluster.deleteFile(cluster, exportLocation); - } - - @Override - protected void setUp() throws Exception { - props = new Properties(); - props.setProperty("fs.default.name", cluster.getProperties().getProperty("fs.default.name")); - System.out.println("Filesystem class : " + cluster.getFileSystem().getClass().getName() - + ", fs.default.name : " + props.getProperty("fs.default.name")); - fqdataLocation = cluster.getProperties().getProperty("fs.default.name") + dataLocation; - fqexportLocation = cluster.getProperties().getProperty("fs.default.name") + exportLocation; - System.out.println("FQ Data Location :" + fqdataLocation); - System.out.println("FQ Export Location :" + fqexportLocation); - cleanup(); - } - - @Override - protected void tearDown() throws Exception { - cleanup(); - } - - private void populateDataFile() throws IOException { - MiniCluster.deleteFile(cluster, dataLocation); - String[] input = new String[] { - "237,Krishna,01/01/1990,M,IN,TN", - "238,Kalpana,01/01/2000,F,IN,KA", - "239,Satya,01/01/2001,M,US,TN", - "240,Kavya,01/01/2002,F,US,KA" - }; - MiniCluster.createInputFile(cluster, dataLocation, input); - } - - private static class EmpDetail { - String name; - String dob; - String mf; - String country; - String state; - } - - private void assertEmpDetail(Tuple t, Map eds) throws ExecException { - assertNotNull(t); - assertEquals(6, t.size()); - - assertTrue(t.get(0).getClass() == Integer.class); - assertTrue(t.get(1).getClass() == String.class); - assertTrue(t.get(2).getClass() == String.class); - assertTrue(t.get(3).getClass() == String.class); - assertTrue(t.get(4).getClass() == String.class); - assertTrue(t.get(5).getClass() == String.class); - - EmpDetail ed = eds.remove(t.get(0)); - assertNotNull(ed); - - assertEquals(ed.name, t.get(1)); - assertEquals(ed.dob, t.get(2)); - assertEquals(ed.mf, t.get(3)); - assertEquals(ed.country, t.get(4)); - assertEquals(ed.state, t.get(5)); - } - - private void addEmpDetail(Map empDetails, int id, String name, - String dob, String mf, String country, String state) { - EmpDetail ed = new EmpDetail(); - ed.name = name; - ed.dob = dob; - ed.mf = mf; - ed.country = country; - ed.state = state; - empDetails.put(id, ed); - } - - - - private void assertEmpDetail(Tuple t, Integer id, String name, String dob, String mf) - throws ExecException { - assertNotNull(t); - assertEquals(4, t.size()); - assertTrue(t.get(0).getClass() == Integer.class); - assertTrue(t.get(1).getClass() == String.class); - assertTrue(t.get(2).getClass() == String.class); - assertTrue(t.get(3).getClass() == String.class); - - assertEquals(id, t.get(0)); - assertEquals(name, t.get(1)); - assertEquals(dob, t.get(2)); - assertEquals(mf, t.get(3)); - } - - private void assertEmpDetail(Tuple t, String mf, String name) - throws ExecException { - assertNotNull(t); - assertEquals(2, t.size()); - assertTrue(t.get(0).getClass() == String.class); - assertTrue(t.get(1).getClass() == String.class); - - assertEquals(mf, t.get(0)); - assertEquals(name, t.get(1)); - } - - - - public void testLoadNonPartTable() throws Exception { - populateDataFile(); - { - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - server.setBatchOn(); - server - .registerQuery("A = load '" - + fqdataLocation - + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray);"); - server.registerQuery("store A into '" + NONPART_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "');"); - server.executeBatch(); - } - { - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - - server - .registerQuery("A = load '" - + fqexportLocation - + "' using org.apache.hcatalog.pig.HCatEximLoader();"); - Iterator XIter = server.openIterator("A"); - assertTrue(XIter.hasNext()); - Tuple t = XIter.next(); - assertEmpDetail(t, 237, "Krishna", "01/01/1990", "M"); - assertTrue(XIter.hasNext()); - t = XIter.next(); - assertEmpDetail(t, 238, "Kalpana", "01/01/2000", "F"); - assertTrue(XIter.hasNext()); - t = XIter.next(); - assertEmpDetail(t, 239, "Satya", "01/01/2001", "M"); - assertTrue(XIter.hasNext()); - t = XIter.next(); - assertEmpDetail(t, 240, "Kavya", "01/01/2002", "F"); - assertFalse(XIter.hasNext()); - } - } - - public void testLoadNonPartProjection() throws Exception { - populateDataFile(); - { - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - server.setBatchOn(); - server - .registerQuery("A = load '" - + fqdataLocation - + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray);"); - server.registerQuery("store A into '" + NONPART_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "');"); - server.executeBatch(); - } - { - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - - server - .registerQuery("A = load '" - + fqexportLocation - + "' using org.apache.hcatalog.pig.HCatEximLoader();"); - server.registerQuery("B = foreach A generate emp_sex, emp_name;"); - - Iterator XIter = server.openIterator("B"); - assertTrue(XIter.hasNext()); - Tuple t = XIter.next(); - assertEmpDetail(t, "M", "Krishna"); - assertTrue(XIter.hasNext()); - t = XIter.next(); - assertEmpDetail(t, "F", "Kalpana"); - assertTrue(XIter.hasNext()); - t = XIter.next(); - assertEmpDetail(t, "M", "Satya"); - assertTrue(XIter.hasNext()); - t = XIter.next(); - assertEmpDetail(t, "F", "Kavya"); - assertFalse(XIter.hasNext()); - } - } - - - public void testLoadMultiPartTable() throws Exception { - { - populateDataFile(); - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - server.setBatchOn(); - server - .registerQuery("A = load '" - + fqdataLocation + - "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray, emp_country:chararray, emp_state:chararray);" - ); - server.registerQuery("INTN = FILTER A BY emp_country == 'IN' AND emp_state == 'TN';"); - server.registerQuery("INKA = FILTER A BY emp_country == 'IN' AND emp_state == 'KA';"); - server.registerQuery("USTN = FILTER A BY emp_country == 'US' AND emp_state == 'TN';"); - server.registerQuery("USKA = FILTER A BY emp_country == 'US' AND emp_state == 'KA';"); - server.registerQuery("store INTN into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=in,emp_state=tn');"); - server.registerQuery("store INKA into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=in,emp_state=ka');"); - server.registerQuery("store USTN into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=us,emp_state=tn');"); - server.registerQuery("store USKA into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=us,emp_state=ka');"); - server.executeBatch(); - } - { - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - - server - .registerQuery("A = load '" - + fqexportLocation - + "' using org.apache.hcatalog.pig.HCatEximLoader() " - //+ "as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray, emp_country:chararray, emp_state:chararray);"); - + ";"); - - Iterator XIter = server.openIterator("A"); - - Map empDetails = new TreeMap(); - addEmpDetail(empDetails, 237, "Krishna", "01/01/1990", "M", "in", "tn"); - addEmpDetail(empDetails, 238, "Kalpana", "01/01/2000", "F", "in", "ka"); - addEmpDetail(empDetails, 239, "Satya", "01/01/2001", "M", "us", "tn"); - addEmpDetail(empDetails, 240, "Kavya", "01/01/2002", "F", "us", "ka"); - - while(XIter.hasNext()) { - Tuple t = XIter.next(); - assertNotSame(0, empDetails.size()); - assertEmpDetail(t, empDetails); - } - assertEquals(0, empDetails.size()); - } - } - - public void testLoadMultiPartFilter() throws Exception { - { - populateDataFile(); - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - server.setBatchOn(); - server - .registerQuery("A = load '" - + fqdataLocation + - "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray, emp_country:chararray, emp_state:chararray);" - ); - server.registerQuery("INTN = FILTER A BY emp_country == 'IN' AND emp_state == 'TN';"); - server.registerQuery("INKA = FILTER A BY emp_country == 'IN' AND emp_state == 'KA';"); - server.registerQuery("USTN = FILTER A BY emp_country == 'US' AND emp_state == 'TN';"); - server.registerQuery("USKA = FILTER A BY emp_country == 'US' AND emp_state == 'KA';"); - server.registerQuery("store INTN into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=in,emp_state=tn');"); - server.registerQuery("store INKA into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=in,emp_state=ka');"); - server.registerQuery("store USTN into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=us,emp_state=tn');"); - server.registerQuery("store USKA into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + - "', 'emp_country=us,emp_state=ka');"); - server.executeBatch(); - } - { - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - - server - .registerQuery("A = load '" - + fqexportLocation - + "' using org.apache.hcatalog.pig.HCatEximLoader() " - + ";"); - server.registerQuery("B = filter A by emp_state == 'ka';"); - - Iterator XIter = server.openIterator("B"); - - Map empDetails = new TreeMap(); - addEmpDetail(empDetails, 238, "Kalpana", "01/01/2000", "F", "in", "ka"); - addEmpDetail(empDetails, 240, "Kavya", "01/01/2002", "F", "us", "ka"); - - while(XIter.hasNext()) { - Tuple t = XIter.next(); - assertNotSame(0, empDetails.size()); - assertEmpDetail(t, empDetails); - } - assertEquals(0, empDetails.size()); - } - } - - -} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatEximStorer.java.broken hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatEximStorer.java.broken deleted file mode 100644 index 5424269..0000000 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatEximStorer.java.broken +++ /dev/null @@ -1,395 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.pig; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Set; -import java.util.TreeSet; - -import junit.framework.TestCase; - -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hcatalog.MiniCluster; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.parse.EximUtil; -import org.apache.hadoop.hive.serde.Constants; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.impl.logicalLayer.FrontendException; -import org.apache.pig.impl.util.UDFContext; - -public class TestHCatEximStorer extends TestCase { - - private static final String NONPART_TABLE = "junit_unparted"; - private static final String PARTITIONED_TABLE = "junit_parted"; - private static MiniCluster cluster = MiniCluster.buildCluster(); - - private static final String dataLocation = "/tmp/data"; - private static String fqdataLocation; - private static final String exportLocation = "/tmp/export"; - private static String fqexportLocation; - - private static Properties props; - - private void cleanup() throws IOException { - MiniCluster.deleteFile(cluster, dataLocation); - MiniCluster.deleteFile(cluster, exportLocation); - } - - @Override - protected void setUp() throws Exception { - props = new Properties(); - props.setProperty("fs.default.name", cluster.getProperties().getProperty("fs.default.name")); - System.out.println("Filesystem class : " + cluster.getFileSystem().getClass().getName() + ", fs.default.name : " + props.getProperty("fs.default.name")); - fqdataLocation = cluster.getProperties().getProperty("fs.default.name") + dataLocation; - fqexportLocation = cluster.getProperties().getProperty("fs.default.name") + exportLocation; - System.out.println("FQ Data Location :" + fqdataLocation); - System.out.println("FQ Export Location :" + fqexportLocation); - cleanup(); - } - - @Override - protected void tearDown() throws Exception { - cleanup(); - } - - private void populateDataFile() throws IOException { - MiniCluster.deleteFile(cluster, dataLocation); - String[] input = new String[] { - "237,Krishna,01/01/1990,M,IN,TN", - "238,Kalpana,01/01/2000,F,IN,KA", - "239,Satya,01/01/2001,M,US,TN", - "240,Kavya,01/01/2002,F,US,KA" - }; - MiniCluster.createInputFile(cluster, dataLocation, input); - } - - public void testStoreNonPartTable() throws Exception { - populateDataFile(); - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - server.setBatchOn(); - server.registerQuery("A = load '" + fqdataLocation + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray);"); - server.registerQuery("store A into '" + NONPART_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "');"); - server.executeBatch(); - - FileSystem fs = cluster.getFileSystem(); - - System.out.println("Filesystem class : " + cluster.getFileSystem().getClass().getName() + ", fs.default.name : " + props.getProperty("fs.default.name")); - - Map.Entry> metadata = EximUtil.readMetaData(fs, new Path(exportLocation, "_metadata")); - Table table = metadata.getKey(); - List partitions = metadata.getValue(); - - List columns = new ArrayList(); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_id", - Constants.INT_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_name", - Constants.STRING_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_dob", - Constants.STRING_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_sex", - Constants.STRING_TYPE_NAME, ""))); - - - assertEquals("default", table.getDbName()); - assertEquals(NONPART_TABLE, table.getTableName()); - assertTrue(EximUtil.schemaCompare(table.getSd().getCols(), - HCatUtil.getFieldSchemaList(columns))); - assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", - table.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); - assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", - table.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); - assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat", - table.getSd().getInputFormat()); - assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat", - table.getSd().getOutputFormat()); - assertEquals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe", - table.getSd().getSerdeInfo().getSerializationLib()); - assertEquals(0, table.getPartitionKeys().size()); - - assertEquals(0, partitions.size()); - } - - public void testStorePartTable() throws Exception { - populateDataFile(); - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - server.setBatchOn(); - server.registerQuery("A = load '" + fqdataLocation + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray);"); - server.registerQuery("store A into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "', 'emp_country=in,emp_state=tn');"); - server.executeBatch(); - - FileSystem fs = cluster.getFileSystem(); - - System.out.println("Filesystem class : " + cluster.getFileSystem().getClass().getName() + ", fs.default.name : " + props.getProperty("fs.default.name")); - - Map.Entry> metadata = EximUtil.readMetaData(fs, new Path(exportLocation, "_metadata")); - Table table = metadata.getKey(); - List partitions = metadata.getValue(); - - List columns = new ArrayList(); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_id", - Constants.INT_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_name", - Constants.STRING_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_dob", - Constants.STRING_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_sex", - Constants.STRING_TYPE_NAME, ""))); - - - assertEquals("default", table.getDbName()); - assertEquals(PARTITIONED_TABLE, table.getTableName()); - assertTrue(EximUtil.schemaCompare(table.getSd().getCols(), - HCatUtil.getFieldSchemaList(columns))); - assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", - table.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); - assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", - table.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); - assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat", - table.getSd().getInputFormat()); - assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat", - table.getSd().getOutputFormat()); - assertEquals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe", - table.getSd().getSerdeInfo().getSerializationLib()); - assertEquals(2, table.getPartitionKeys().size()); - List partSchema = table.getPartitionKeys(); - assertEquals("emp_country", partSchema.get(0).getName()); - assertEquals("emp_state", partSchema.get(1).getName()); - - assertEquals(1, partitions.size()); - Partition partition = partitions.get(0); - assertEquals("in", partition.getValues().get(0)); - assertEquals("tn", partition.getValues().get(1)); - assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", - partition.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); - assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", - partition.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); - } - - public void testStorePartTable_state_country() throws Exception { - populateDataFile(); - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - server.setBatchOn(); - server.registerQuery("A = load '" + fqdataLocation + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray);"); - server.registerQuery("store A into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "', 'emp_state=tn,emp_country=in');"); - server.executeBatch(); - - FileSystem fs = cluster.getFileSystem(); - - System.out.println("Filesystem class : " + cluster.getFileSystem().getClass().getName() + ", fs.default.name : " + props.getProperty("fs.default.name")); - - Map.Entry> metadata = EximUtil.readMetaData(fs, new Path(exportLocation, "_metadata")); - Table table = metadata.getKey(); - List partitions = metadata.getValue(); - - List columns = new ArrayList(); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_id", - Constants.INT_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_name", - Constants.STRING_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_dob", - Constants.STRING_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_sex", - Constants.STRING_TYPE_NAME, ""))); - - - assertEquals("default", table.getDbName()); - assertEquals(PARTITIONED_TABLE, table.getTableName()); - assertTrue(EximUtil.schemaCompare(table.getSd().getCols(), - HCatUtil.getFieldSchemaList(columns))); - assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", - table.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); - assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", - table.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); - assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat", - table.getSd().getInputFormat()); - assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat", - table.getSd().getOutputFormat()); - assertEquals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe", - table.getSd().getSerdeInfo().getSerializationLib()); - assertEquals(2, table.getPartitionKeys().size()); - List partSchema = table.getPartitionKeys(); - assertEquals("emp_state", partSchema.get(0).getName()); - assertEquals("emp_country", partSchema.get(1).getName()); - - assertEquals(1, partitions.size()); - Partition partition = partitions.get(0); - assertEquals("tn", partition.getValues().get(0)); - assertEquals("in", partition.getValues().get(1)); - assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", - partition.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); - assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", - partition.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); - } - - public void testStoreNonPartCompatSchemaTable() throws Exception { - populateDataFile(); - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - server.setBatchOn(); - server.registerQuery("A = load '" + fqdataLocation + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray);"); - server.registerQuery("store A into '" + NONPART_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "', '', 'id:int, name:chararray, dob:chararray, sex:chararray');"); - server.executeBatch(); - - FileSystem fs = cluster.getFileSystem(); - - System.out.println("Filesystem class : " + cluster.getFileSystem().getClass().getName() + ", fs.default.name : " + props.getProperty("fs.default.name")); - - Map.Entry> metadata = EximUtil.readMetaData(fs, new Path(exportLocation, "_metadata")); - Table table = metadata.getKey(); - List partitions = metadata.getValue(); - - List columns = new ArrayList(); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("id", - Constants.INT_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("name", - Constants.STRING_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("dob", - Constants.STRING_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("sex", - Constants.STRING_TYPE_NAME, ""))); - - - assertEquals("default", table.getDbName()); - assertEquals(NONPART_TABLE, table.getTableName()); - assertTrue(EximUtil.schemaCompare(table.getSd().getCols(), - HCatUtil.getFieldSchemaList(columns))); - assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", - table.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); - assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", - table.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); - assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat", - table.getSd().getInputFormat()); - assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat", - table.getSd().getOutputFormat()); - assertEquals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe", - table.getSd().getSerdeInfo().getSerializationLib()); - assertEquals(0, table.getPartitionKeys().size()); - - assertEquals(0, partitions.size()); - } - - public void testStoreNonPartNonCompatSchemaTable() throws Exception { - populateDataFile(); - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - server.setBatchOn(); - server.registerQuery("A = load '" + fqdataLocation + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray);"); - server.registerQuery("store A into '" + NONPART_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "', '', 'id:int, name:chararray, dob:chararray, sex:int');"); - try { - server.executeBatch(); - fail("Expected exception not thrown"); - } catch (FrontendException e) { - } - } - - public void testStoreMultiPartTable() throws Exception { - populateDataFile(); - PigServer server = new PigServer(ExecType.LOCAL, props); - UDFContext.getUDFContext().setClientSystemProps(); - server.setBatchOn(); - server.registerQuery("A = load '" + fqdataLocation + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray, emp_country:chararray, emp_state:chararray);"); - server.registerQuery("INTN = FILTER A BY emp_country == 'IN' AND emp_state == 'TN';"); - server.registerQuery("INKA = FILTER A BY emp_country == 'IN' AND emp_state == 'KA';"); - server.registerQuery("USTN = FILTER A BY emp_country == 'US' AND emp_state == 'TN';"); - server.registerQuery("USKA = FILTER A BY emp_country == 'US' AND emp_state == 'KA';"); - server.registerQuery("store INTN into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "', 'emp_country=in,emp_state=tn');"); - server.registerQuery("store INKA into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "', 'emp_country=in,emp_state=ka');"); - server.registerQuery("store USTN into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "', 'emp_country=us,emp_state=tn');"); - server.registerQuery("store USKA into '" + PARTITIONED_TABLE - + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "', 'emp_country=us,emp_state=ka');"); - server.executeBatch(); - - FileSystem fs = cluster.getFileSystem(); - - System.out.println("Filesystem class : " + cluster.getFileSystem().getClass().getName() + ", fs.default.name : " + props.getProperty("fs.default.name")); - - Map.Entry> metadata = EximUtil.readMetaData(fs, new Path(exportLocation, "_metadata")); - Table table = metadata.getKey(); - List partitions = metadata.getValue(); - - List columns = new ArrayList(); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_id", - Constants.INT_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_name", - Constants.STRING_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_dob", - Constants.STRING_TYPE_NAME, ""))); - columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_sex", - Constants.STRING_TYPE_NAME, ""))); - - - assertEquals("default", table.getDbName()); - assertEquals(PARTITIONED_TABLE, table.getTableName()); - assertTrue(EximUtil.schemaCompare(table.getSd().getCols(), - HCatUtil.getFieldSchemaList(columns))); - assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", - table.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); - assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", - table.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); - assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat", - table.getSd().getInputFormat()); - assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat", - table.getSd().getOutputFormat()); - assertEquals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe", - table.getSd().getSerdeInfo().getSerializationLib()); - assertEquals(2, table.getPartitionKeys().size()); - List partSchema = table.getPartitionKeys(); - assertEquals("emp_country", partSchema.get(0).getName()); - assertEquals("emp_state", partSchema.get(1).getName()); - - assertEquals(4, partitions.size()); - Set parts = new TreeSet(); - parts.add("in,tn"); - parts.add("in,ka"); - parts.add("us,tn"); - parts.add("us,ka"); - - for (Partition partition : partitions) { - assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", - partition.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); - assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", - partition.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); - assertTrue(parts.remove(partition.getValues().get(0) + "," + partition.getValues().get(1))); - } - assertEquals(0, parts.size()); - } -} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java deleted file mode 100644 index 2644459..0000000 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoader.java +++ /dev/null @@ -1,451 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.pig; - -import java.io.File; -import java.io.IOException; -import java.io.RandomAccessFile; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Properties; - -import junit.framework.TestCase; - -import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hcatalog.HcatTestUtils; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.data.Pair; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.ResourceStatistics; -import org.apache.pig.data.DataType; -import org.apache.pig.data.Tuple; -import org.apache.pig.impl.logicalLayer.schema.Schema; -import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; - -public class TestHCatLoader extends TestCase { - private static final String TEST_DATA_DIR = - "/tmp/build/test/data/" + TestHCatLoader.class.getCanonicalName(); - private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; - private static final String BASIC_FILE_NAME = TEST_DATA_DIR + "/basic.input.data"; - private static final String COMPLEX_FILE_NAME = TEST_DATA_DIR + "/complex.input.data"; - - private static final String BASIC_TABLE = "junit_unparted_basic"; - private static final String COMPLEX_TABLE = "junit_unparted_complex"; - private static final String PARTITIONED_TABLE = "junit_parted_basic"; - private static final String SPECIFIC_SIZE_TABLE = "junit_specific_size"; - private static Driver driver; - - private static int guardTestCount = 6; // ugh, instantiate using introspection in guardedSetupBeforeClass - private static boolean setupHasRun = false; - - - private static Map> basicInputData; - - protected String storageFormat() { - return "RCFILE tblproperties('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," + - "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver')"; - } - - private void dropTable(String tablename) throws IOException, CommandNeedRetryException { - driver.run("drop table " + tablename); - } - - private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { - String createTable; - createTable = "create table " + tablename + "(" + schema + ") "; - if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { - createTable = createTable + "partitioned by (" + partitionedBy + ") "; - } - createTable = createTable + "stored as " +storageFormat(); - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]"); - } - } - - private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { - createTable(tablename, schema, null); - } - - protected void guardedSetUpBeforeClass() throws Exception { - if (!setupHasRun) { - setupHasRun = true; - } else { - return; - } - - File f = new File(TEST_WAREHOUSE_DIR); - if (f.exists()) { - FileUtil.fullyDelete(f); - } - new File(TEST_WAREHOUSE_DIR).mkdirs(); - - HiveConf hiveConf = new HiveConf(this.getClass()); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); - driver = new Driver(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); - - cleanup(); - - createTable(BASIC_TABLE, "a int, b string"); - createTable(COMPLEX_TABLE, - "name string, studentid int, " - + "contact struct, " - + "currently_registered_courses array, " - + "current_grades map, " - + "phnos array>"); - - createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); - createTable(SPECIFIC_SIZE_TABLE, "a int, b string"); - - int LOOP_SIZE = 3; - String[] input = new String[LOOP_SIZE * LOOP_SIZE]; - basicInputData = new HashMap>(); - int k = 0; - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - String sj = "S" + j + "S"; - input[k] = si + "\t" + sj; - basicInputData.put(k, new Pair(i, sj)); - k++; - } - } - HcatTestUtils.createTestDataFile(BASIC_FILE_NAME, input); - HcatTestUtils.createTestDataFile(COMPLEX_FILE_NAME, - new String[]{ - //"Henry Jekyll\t42\t(415-253-6367,hjekyll@contemporary.edu.uk)\t{(PHARMACOLOGY),(PSYCHIATRY)},[PHARMACOLOGY#A-,PSYCHIATRY#B+],{(415-253-6367,cell),(408-253-6367,landline)}", - //"Edward Hyde\t1337\t(415-253-6367,anonymous@b44chan.org)\t{(CREATIVE_WRITING),(COPYRIGHT_LAW)},[CREATIVE_WRITING#A+,COPYRIGHT_LAW#D],{(415-253-6367,cell),(408-253-6367,landline)}", - } - ); - - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + BASIC_FILE_NAME + "' as (a:int, b:chararray);"); - - server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hcatalog.pig.HCatStorer();"); - server.registerQuery("store A into '" + SPECIFIC_SIZE_TABLE + "' using org.apache.hcatalog.pig.HCatStorer();"); - server.registerQuery("B = foreach A generate a,b;"); - server.registerQuery("B2 = filter B by a < 2;"); - server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatStorer('bkt=0');"); - - server.registerQuery("C = foreach A generate a,b;"); - server.registerQuery("C2 = filter C by a >= 2;"); - server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatStorer('bkt=1');"); - - server.registerQuery("D = load '" + COMPLEX_FILE_NAME + "' as (name:chararray, studentid:int, contact:tuple(phno:chararray,email:chararray), currently_registered_courses:bag{innertup:tuple(course:chararray)}, current_grades:map[ ] , phnos :bag{innertup:tuple(phno:chararray,type:chararray)});"); - server.registerQuery("store D into '" + COMPLEX_TABLE + "' using org.apache.hcatalog.pig.HCatStorer();"); - server.executeBatch(); - - } - - private void cleanup() throws IOException, CommandNeedRetryException { - dropTable(BASIC_TABLE); - dropTable(COMPLEX_TABLE); - dropTable(PARTITIONED_TABLE); - dropTable(SPECIFIC_SIZE_TABLE); - } - - protected void guardedTearDownAfterClass() throws Exception { - guardTestCount--; - if (guardTestCount > 0) { - return; - } - cleanup(); - } - - @Override - protected void setUp() throws Exception { - guardedSetUpBeforeClass(); - } - - @Override - protected void tearDown() throws Exception { - guardedTearDownAfterClass(); - } - - public void testSchemaLoadBasic() throws IOException { - - PigServer server = new PigServer(ExecType.LOCAL); - - // test that schema was loaded correctly - server.registerQuery("X = load '" + BASIC_TABLE + "' using org.apache.hcatalog.pig.HCatLoader();"); - Schema dumpedXSchema = server.dumpSchema("X"); - List Xfields = dumpedXSchema.getFields(); - assertEquals(2, Xfields.size()); - assertTrue(Xfields.get(0).alias.equalsIgnoreCase("a")); - assertTrue(Xfields.get(0).type == DataType.INTEGER); - assertTrue(Xfields.get(1).alias.equalsIgnoreCase("b")); - assertTrue(Xfields.get(1).type == DataType.CHARARRAY); - - } - - public void testReadDataBasic() throws IOException { - PigServer server = new PigServer(ExecType.LOCAL); - - server.registerQuery("X = load '" + BASIC_TABLE + "' using org.apache.hcatalog.pig.HCatLoader();"); - Iterator XIter = server.openIterator("X"); - int numTuplesRead = 0; - while (XIter.hasNext()) { - Tuple t = XIter.next(); - assertEquals(2, t.size()); - assertTrue(t.get(0).getClass() == Integer.class); - assertTrue(t.get(1).getClass() == String.class); - assertEquals(t.get(0), basicInputData.get(numTuplesRead).first); - assertEquals(t.get(1), basicInputData.get(numTuplesRead).second); - numTuplesRead++; - } - assertEquals(basicInputData.size(), numTuplesRead); - } - - public void testSchemaLoadComplex() throws IOException { - - PigServer server = new PigServer(ExecType.LOCAL); - - // test that schema was loaded correctly - server.registerQuery("K = load '" + COMPLEX_TABLE + "' using org.apache.hcatalog.pig.HCatLoader();"); - Schema dumpedKSchema = server.dumpSchema("K"); - List Kfields = dumpedKSchema.getFields(); - assertEquals(6, Kfields.size()); - - assertEquals(DataType.CHARARRAY, Kfields.get(0).type); - assertEquals("name", Kfields.get(0).alias.toLowerCase()); - - assertEquals(DataType.INTEGER, Kfields.get(1).type); - assertEquals("studentid", Kfields.get(1).alias.toLowerCase()); - - assertEquals(DataType.TUPLE, Kfields.get(2).type); - assertEquals("contact", Kfields.get(2).alias.toLowerCase()); - { - assertNotNull(Kfields.get(2).schema); - assertTrue(Kfields.get(2).schema.getFields().size() == 2); - assertTrue(Kfields.get(2).schema.getFields().get(0).type == DataType.CHARARRAY); - assertTrue(Kfields.get(2).schema.getFields().get(0).alias.equalsIgnoreCase("phno")); - assertTrue(Kfields.get(2).schema.getFields().get(1).type == DataType.CHARARRAY); - assertTrue(Kfields.get(2).schema.getFields().get(1).alias.equalsIgnoreCase("email")); - } - assertEquals(DataType.BAG, Kfields.get(3).type); - assertEquals("currently_registered_courses", Kfields.get(3).alias.toLowerCase()); - { - assertNotNull(Kfields.get(3).schema); - assertEquals(1, Kfields.get(3).schema.getFields().size()); - assertEquals(DataType.TUPLE, Kfields.get(3).schema.getFields().get(0).type); - assertNotNull(Kfields.get(3).schema.getFields().get(0).schema); - assertEquals(1, Kfields.get(3).schema.getFields().get(0).schema.getFields().size()); - assertEquals(DataType.CHARARRAY, Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).type); - // assertEquals("course",Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase()); - // commented out, because the name becomes "innerfield" by default - we call it "course" in pig, - // but in the metadata, it'd be anonymous, so this would be autogenerated, which is fine - } - assertEquals(DataType.MAP, Kfields.get(4).type); - assertEquals("current_grades", Kfields.get(4).alias.toLowerCase()); - assertEquals(DataType.BAG, Kfields.get(5).type); - assertEquals("phnos", Kfields.get(5).alias.toLowerCase()); - { - assertNotNull(Kfields.get(5).schema); - assertEquals(1, Kfields.get(5).schema.getFields().size()); - assertEquals(DataType.TUPLE, Kfields.get(5).schema.getFields().get(0).type); - assertNotNull(Kfields.get(5).schema.getFields().get(0).schema); - assertTrue(Kfields.get(5).schema.getFields().get(0).schema.getFields().size() == 2); - assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).type); - assertEquals("phno", Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase()); - assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).type); - assertEquals("type", Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).alias.toLowerCase()); - } - - } - - public void testReadPartitionedBasic() throws IOException, CommandNeedRetryException { - PigServer server = new PigServer(ExecType.LOCAL); - - driver.run("select * from " + PARTITIONED_TABLE); - ArrayList valuesReadFromHiveDriver = new ArrayList(); - driver.getResults(valuesReadFromHiveDriver); - assertEquals(basicInputData.size(), valuesReadFromHiveDriver.size()); - - server.registerQuery("W = load '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatLoader();"); - Schema dumpedWSchema = server.dumpSchema("W"); - List Wfields = dumpedWSchema.getFields(); - assertEquals(3, Wfields.size()); - assertTrue(Wfields.get(0).alias.equalsIgnoreCase("a")); - assertTrue(Wfields.get(0).type == DataType.INTEGER); - assertTrue(Wfields.get(1).alias.equalsIgnoreCase("b")); - assertTrue(Wfields.get(1).type == DataType.CHARARRAY); - assertTrue(Wfields.get(2).alias.equalsIgnoreCase("bkt")); - assertTrue(Wfields.get(2).type == DataType.CHARARRAY); - - Iterator WIter = server.openIterator("W"); - Collection> valuesRead = new ArrayList>(); - while (WIter.hasNext()) { - Tuple t = WIter.next(); - assertTrue(t.size() == 3); - assertTrue(t.get(0).getClass() == Integer.class); - assertTrue(t.get(1).getClass() == String.class); - assertTrue(t.get(2).getClass() == String.class); - valuesRead.add(new Pair((Integer) t.get(0), (String) t.get(1))); - if ((Integer) t.get(0) < 2) { - assertEquals("0", t.get(2)); - } else { - assertEquals("1", t.get(2)); - } - } - assertEquals(valuesReadFromHiveDriver.size(), valuesRead.size()); - - server.registerQuery("P1 = load '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatLoader();"); - server.registerQuery("P1filter = filter P1 by bkt == '0';"); - Iterator P1Iter = server.openIterator("P1filter"); - int count1 = 0; - while (P1Iter.hasNext()) { - Tuple t = P1Iter.next(); - - assertEquals("0", t.get(2)); - assertEquals(1, t.get(0)); - count1++; - } - assertEquals(3, count1); - - server.registerQuery("P2 = load '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatLoader();"); - server.registerQuery("P2filter = filter P2 by bkt == '1';"); - Iterator P2Iter = server.openIterator("P2filter"); - int count2 = 0; - while (P2Iter.hasNext()) { - Tuple t = P2Iter.next(); - - assertEquals("1", t.get(2)); - assertTrue(((Integer) t.get(0)) > 1); - count2++; - } - assertEquals(6, count2); - } - - public void testProjectionsBasic() throws IOException { - - PigServer server = new PigServer(ExecType.LOCAL); - - // projections are handled by using generate, not "as" on the Load - - server.registerQuery("Y1 = load '" + BASIC_TABLE + "' using org.apache.hcatalog.pig.HCatLoader();"); - server.registerQuery("Y2 = foreach Y1 generate a;"); - server.registerQuery("Y3 = foreach Y1 generate b,a;"); - Schema dumpedY2Schema = server.dumpSchema("Y2"); - Schema dumpedY3Schema = server.dumpSchema("Y3"); - List Y2fields = dumpedY2Schema.getFields(); - List Y3fields = dumpedY3Schema.getFields(); - assertEquals(1, Y2fields.size()); - assertEquals("a", Y2fields.get(0).alias.toLowerCase()); - assertEquals(DataType.INTEGER, Y2fields.get(0).type); - assertEquals(2, Y3fields.size()); - assertEquals("b", Y3fields.get(0).alias.toLowerCase()); - assertEquals(DataType.CHARARRAY, Y3fields.get(0).type); - assertEquals("a", Y3fields.get(1).alias.toLowerCase()); - assertEquals(DataType.INTEGER, Y3fields.get(1).type); - - int numTuplesRead = 0; - Iterator Y2Iter = server.openIterator("Y2"); - while (Y2Iter.hasNext()) { - Tuple t = Y2Iter.next(); - assertEquals(t.size(), 1); - assertTrue(t.get(0).getClass() == Integer.class); - assertEquals(t.get(0), basicInputData.get(numTuplesRead).first); - numTuplesRead++; - } - numTuplesRead = 0; - Iterator Y3Iter = server.openIterator("Y3"); - while (Y3Iter.hasNext()) { - Tuple t = Y3Iter.next(); - assertEquals(t.size(), 2); - assertTrue(t.get(0).getClass() == String.class); - assertEquals(t.get(0), basicInputData.get(numTuplesRead).second); - assertTrue(t.get(1).getClass() == Integer.class); - assertEquals(t.get(1), basicInputData.get(numTuplesRead).first); - numTuplesRead++; - } - assertEquals(basicInputData.size(), numTuplesRead); - } - - public void testGetInputBytes() throws Exception { - File file = new File(TEST_WAREHOUSE_DIR + "/" + SPECIFIC_SIZE_TABLE + "/part-m-00000"); - file.deleteOnExit(); - RandomAccessFile randomAccessFile = new RandomAccessFile(file, "rw"); - randomAccessFile.setLength(2L * 1024 * 1024 * 1024); - - Job job = new Job(); - HCatLoader hCatLoader = new HCatLoader(); - hCatLoader.setUDFContextSignature(this.getName()); - hCatLoader.setLocation(SPECIFIC_SIZE_TABLE, job); - ResourceStatistics statistics = hCatLoader.getStatistics(file.getAbsolutePath(), job); - assertEquals(2048, (long) statistics.getmBytes()); - } - - public void testConvertBooleanToInt() throws Exception { - String tbl = "test_convert_boolean_to_int"; - String inputFileName = TEST_DATA_DIR + "/testConvertBooleanToInt/data.txt"; - File inputDataDir = new File(inputFileName).getParentFile(); - inputDataDir.mkdir(); - - String[] lines = new String[]{"llama\ttrue", "alpaca\tfalse"}; - HcatTestUtils.createTestDataFile(inputFileName, lines); - - assertEquals(0, driver.run("drop table if exists " + tbl).getResponseCode()); - assertEquals(0, driver.run("create external table " + tbl + - " (a string, b boolean) row format delimited fields terminated by '\t'" + - " stored as textfile location 'file://" + - inputDataDir.getPath().replaceAll("\\\\", "/") + "'").getResponseCode()); - - Properties properties = new Properties(); - properties.setProperty(HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, "true"); - PigServer server = new PigServer(ExecType.LOCAL, properties); - server.registerQuery( - "data = load 'test_convert_boolean_to_int' using org.apache.hcatalog.pig.HCatLoader();"); - Schema schema = server.dumpSchema("data"); - assertEquals(2, schema.getFields().size()); - - assertEquals("a", schema.getField(0).alias); - assertEquals(DataType.CHARARRAY, schema.getField(0).type); - assertEquals("b", schema.getField(1).alias); - if (PigHCatUtil.pigHasBooleanSupport()){ - assertEquals(DataType.BOOLEAN, schema.getField(1).type); - } else { - assertEquals(DataType.INTEGER, schema.getField(1).type); - } - - Iterator iterator = server.openIterator("data"); - Tuple t = iterator.next(); - assertEquals("llama", t.get(0)); - assertEquals(1, t.get(1)); - t = iterator.next(); - assertEquals("alpaca", t.get(0)); - assertEquals(0, t.get(1)); - assertFalse(iterator.hasNext()); - } -} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoaderComplexSchema.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoaderComplexSchema.java deleted file mode 100644 index 1be55dd..0000000 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoaderComplexSchema.java +++ /dev/null @@ -1,304 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.pig; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; - -import junit.framework.Assert; - -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.backend.executionengine.ExecException; -import org.apache.pig.backend.executionengine.ExecJob; -import org.apache.pig.data.BagFactory; -import org.apache.pig.data.DataBag; -import org.apache.pig.data.Tuple; -import org.apache.pig.data.TupleFactory; -import org.apache.pig.impl.logicalLayer.FrontendException; -import org.apache.pig.impl.logicalLayer.schema.Schema; -import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; -import org.junit.BeforeClass; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class TestHCatLoaderComplexSchema { - - //private static MiniCluster cluster = MiniCluster.buildCluster(); - private static Driver driver; - //private static Properties props; - private static final Logger LOG = LoggerFactory.getLogger(TestHCatLoaderComplexSchema.class); - - private void dropTable(String tablename) throws IOException, CommandNeedRetryException { - driver.run("drop table " + tablename); - } - - protected String storageFormat() { - return "RCFILE tblproperties('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," + - "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver')"; - } - - private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { - String createTable; - createTable = "create table " + tablename + "(" + schema + ") "; - if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { - createTable = createTable + "partitioned by (" + partitionedBy + ") "; - } - createTable = createTable + "stored as " + storageFormat(); - LOG.info("Creating table:\n {}", createTable); - CommandProcessorResponse result = driver.run(createTable); - int retCode = result.getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + " " + result.getErrorMessage() + "]"); - } - } - - private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { - createTable(tablename, schema, null); - } - - @BeforeClass - public static void setUpBeforeClass() throws Exception { - - HiveConf hiveConf = new HiveConf(TestHCatLoaderComplexSchema.class); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - driver = new Driver(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); - //props = new Properties(); - //props.setProperty("fs.default.name", cluster.getProperties().getProperty("fs.default.name")); - - } - - private static final TupleFactory tf = TupleFactory.getInstance(); - private static final BagFactory bf = BagFactory.getInstance(); - - private Tuple t(Object... objects) { - return tf.newTuple(Arrays.asList(objects)); - } - - private DataBag b(Tuple... objects) { - return bf.newDefaultBag(Arrays.asList(objects)); - } - - /** - * artificially complex nested schema to test nested schema conversion - * @throws Exception - */ - @Test - public void testSyntheticComplexSchema() throws Exception { - String pigSchema = - "a: " + - "(" + - "aa: chararray, " + - "ab: long, " + - "ac: map[], " + - "ad: { t: (ada: long) }, " + - "ae: { t: (aea:long, aeb: ( aeba: chararray, aebb: long)) }," + - "af: (afa: chararray, afb: long) " + - ")," + - "b: chararray, " + - "c: long, " + - "d: { t: (da:long, db: ( dba: chararray, dbb: long), dc: { t: (dca: long) } ) } "; - - // with extra structs - String tableSchema = - "a struct<" + - "aa: string, " + - "ab: bigint, " + - "ac: map, " + - "ad: array>, " + - "ae: array>>," + - "af: struct " + - ">, " + - "b string, " + - "c bigint, " + - "d array, dc: array>>>"; - - // without extra structs - String tableSchema2 = - "a struct<" + - "aa: string, " + - "ab: bigint, " + - "ac: map, " + - "ad: array, " + - "ae: array>>," + - "af: struct " + - ">, " + - "b string, " + - "c bigint, " + - "d array, dc: array>>"; - - List data = new ArrayList(); - for (int i = 0; i < 10; i++) { - Tuple t = t( - t( - "aa test", - 2l, - new HashMap() { - { - put("ac test1", "test 1"); - put("ac test2", "test 2"); - } - }, - b(t(3l), t(4l)), - b(t(5l, t("aeba test", 6l))), - t("afa test", 7l) - ), - "b test", - (long) i, - b(t(8l, t("dba test", 9l), b(t(10l))))); - - data.add(t); - } - verifyWriteRead("testSyntheticComplexSchema", pigSchema, tableSchema, data, true); - verifyWriteRead("testSyntheticComplexSchema", pigSchema, tableSchema, data, false); - verifyWriteRead("testSyntheticComplexSchema2", pigSchema, tableSchema2, data, true); - verifyWriteRead("testSyntheticComplexSchema2", pigSchema, tableSchema2, data, false); - - } - - private void verifyWriteRead(String tablename, String pigSchema, String tableSchema, List data, boolean provideSchemaToStorer) - throws IOException, CommandNeedRetryException, ExecException, FrontendException { - MockLoader.setData(tablename + "Input", data); - try { - createTable(tablename, tableSchema); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + tablename + "Input' using org.apache.hcatalog.pig.MockLoader() AS (" + pigSchema + ");"); - Schema dumpedASchema = server.dumpSchema("A"); - server.registerQuery("STORE A into '" + tablename + "' using org.apache.hcatalog.pig.HCatStorer(" - + (provideSchemaToStorer ? "'', '" + pigSchema + "'" : "") - + ");"); - - ExecJob execJob = server.executeBatch().get(0); - if (!execJob.getStatistics().isSuccessful()) { - throw new RuntimeException("Import failed", execJob.getException()); - } - // test that schema was loaded correctly - server.registerQuery("X = load '" + tablename + "' using org.apache.hcatalog.pig.HCatLoader();"); - server.dumpSchema("X"); - Iterator it = server.openIterator("X"); - int i = 0; - while (it.hasNext()) { - Tuple input = data.get(i++); - Tuple output = it.next(); - Assert.assertEquals(input.toString(), output.toString()); - LOG.info("tuple : {} ", output); - } - Schema dumpedXSchema = server.dumpSchema("X"); - - Assert.assertEquals( - "expected " + dumpedASchema + " but was " + dumpedXSchema + " (ignoring field names)", - "", - compareIgnoreFiledNames(dumpedASchema, dumpedXSchema)); - - } finally { - dropTable(tablename); - } - } - - private String compareIgnoreFiledNames(Schema expected, Schema got) throws FrontendException { - if (expected == null || got == null) { - if (expected == got) { - return ""; - } else { - return "\nexpected " + expected + " got " + got; - } - } - if (expected.size() != got.size()) { - return "\nsize expected " + expected.size() + " (" + expected + ") got " + got.size() + " (" + got + ")"; - } - String message = ""; - for (int i = 0; i < expected.size(); i++) { - FieldSchema expectedField = expected.getField(i); - FieldSchema gotField = got.getField(i); - if (expectedField.type != gotField.type) { - message += "\ntype expected " + expectedField.type + " (" + expectedField + ") got " + gotField.type + " (" + gotField + ")"; - } else { - message += compareIgnoreFiledNames(expectedField.schema, gotField.schema); - } - } - return message; - } - - /** - * tests that unnecessary tuples are drop while converting schema - * (Pig requires Tuples in Bags) - * @throws Exception - */ - @Test - public void testTupleInBagInTupleInBag() throws Exception { - String pigSchema = "a: { b : ( c: { d: (i : long) } ) }"; - - String tableSchema = "a array< array< bigint > >"; - - List data = new ArrayList(); - data.add(t(b(t(b(t(100l), t(101l))), t(b(t(110l)))))); - data.add(t(b(t(b(t(200l))), t(b(t(210l))), t(b(t(220l)))))); - data.add(t(b(t(b(t(300l), t(301l)))))); - data.add(t(b(t(b(t(400l))), t(b(t(410l), t(411l), t(412l)))))); - - - verifyWriteRead("TupleInBagInTupleInBag1", pigSchema, tableSchema, data, true); - verifyWriteRead("TupleInBagInTupleInBag2", pigSchema, tableSchema, data, false); - - // test that we don't drop the unnecessary tuple if the table has the corresponding Struct - String tableSchema2 = "a array< struct< c: array< struct< i: bigint > > > >"; - - verifyWriteRead("TupleInBagInTupleInBag3", pigSchema, tableSchema2, data, true); - verifyWriteRead("TupleInBagInTupleInBag4", pigSchema, tableSchema2, data, false); - - } - - @Test - public void testMapWithComplexData() throws Exception { - String pigSchema = "a: long, b: map[]"; - String tableSchema = "a bigint, b map>"; - - List data = new ArrayList(); - for (int i = 0; i < 10; i++) { - Tuple t = t( - (long) i, - new HashMap() { - { - put("b test 1", t(1l, "test 1")); - put("b test 2", t(2l, "test 2")); - } - }); - - data.add(t); - } - verifyWriteRead("testMapWithComplexData", pigSchema, tableSchema, data, true); - verifyWriteRead("testMapWithComplexData2", pigSchema, tableSchema, data, false); - - } -} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoaderStorer.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoaderStorer.java deleted file mode 100644 index c65ad49..0000000 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatLoaderStorer.java +++ /dev/null @@ -1,141 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.pig; - -import org.apache.hadoop.fs.FileUtil; -import org.apache.hcatalog.HcatTestUtils; -import org.apache.hcatalog.mapreduce.HCatBaseTest; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.backend.executionengine.ExecJob; -import org.apache.pig.data.DataType; -import org.apache.pig.data.Tuple; -import org.apache.pig.impl.logicalLayer.schema.Schema; -import org.junit.Assert; -import org.junit.Test; - -import java.io.File; -import java.util.Iterator; -import java.util.List; - -/** - * Test that require both HCatLoader and HCatStorer. For read or write only functionality, - * please consider @{link TestHCatLoader} or @{link TestHCatStorer}. - */ -public class TestHCatLoaderStorer extends HCatBaseTest { - - /** - * Ensure Pig can read/write tinyint/smallint columns. - */ - @Test - public void testSmallTinyInt() throws Exception { - - String readTblName = "test_small_tiny_int"; - File dataDir = new File(TEST_DATA_DIR + "/testSmallTinyIntData"); - File dataFile = new File(dataDir, "testSmallTinyInt.tsv"); - - String writeTblName = "test_small_tiny_int_write"; - File writeDataFile = new File(TEST_DATA_DIR, writeTblName + ".tsv"); - - FileUtil.fullyDelete(dataDir); // Might not exist - Assert.assertTrue(dataDir.mkdir()); - - HcatTestUtils.createTestDataFile(dataFile.getAbsolutePath(), new String[]{ - String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE), - String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE) - }); - - // Create a table with smallint/tinyint columns, load data, and query from Hive. - Assert.assertEquals(0, driver.run("drop table if exists " + readTblName).getResponseCode()); - Assert.assertEquals(0, driver.run("create external table " + readTblName + - " (my_small_int smallint, my_tiny_int tinyint)" + - " row format delimited fields terminated by '\t' stored as textfile").getResponseCode()); - Assert.assertEquals(0, driver.run("load data local inpath '" + - dataDir.getPath().replaceAll("\\\\", "/") + "' into table " + readTblName).getResponseCode()); - - PigServer server = new PigServer(ExecType.LOCAL); - server.registerQuery( - "data = load '" + readTblName + "' using org.apache.hcatalog.pig.HCatLoader();"); - - // Ensure Pig schema is correct. - Schema schema = server.dumpSchema("data"); - Assert.assertEquals(2, schema.getFields().size()); - Assert.assertEquals("my_small_int", schema.getField(0).alias); - Assert.assertEquals(DataType.INTEGER, schema.getField(0).type); - Assert.assertEquals("my_tiny_int", schema.getField(1).alias); - Assert.assertEquals(DataType.INTEGER, schema.getField(1).type); - - // Ensure Pig can read data correctly. - Iterator it = server.openIterator("data"); - Tuple t = it.next(); - Assert.assertEquals(new Integer(Short.MIN_VALUE), t.get(0)); - Assert.assertEquals(new Integer(Byte.MIN_VALUE), t.get(1)); - t = it.next(); - Assert.assertEquals(new Integer(Short.MAX_VALUE), t.get(0)); - Assert.assertEquals(new Integer(Byte.MAX_VALUE), t.get(1)); - Assert.assertFalse(it.hasNext()); - - // Ensure Pig can write correctly to smallint/tinyint columns. This means values within the - // bounds of the column type are written, and values outside throw an exception. - Assert.assertEquals(0, driver.run("drop table if exists " + writeTblName).getResponseCode()); - Assert.assertEquals(0, driver.run("create table " + writeTblName + - " (my_small_int smallint, my_tiny_int tinyint) stored as rcfile").getResponseCode()); - - // Values within the column type bounds. - HcatTestUtils.createTestDataFile(writeDataFile.getAbsolutePath(), new String[]{ - String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE), - String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE) - }); - smallTinyIntBoundsCheckHelper(writeDataFile.getPath().replaceAll("\\\\", "/"), ExecJob.JOB_STATUS.COMPLETED); - - // Values outside the column type bounds will fail at runtime. - HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooSmall.tsv", new String[]{ - String.format("%d\t%d", Short.MIN_VALUE - 1, 0)}); - smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/shortTooSmall.tsv", ExecJob.JOB_STATUS.FAILED); - - HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooBig.tsv", new String[]{ - String.format("%d\t%d", Short.MAX_VALUE + 1, 0)}); - smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/shortTooBig.tsv", ExecJob.JOB_STATUS.FAILED); - - HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooSmall.tsv", new String[]{ - String.format("%d\t%d", 0, Byte.MIN_VALUE - 1)}); - smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/byteTooSmall.tsv", ExecJob.JOB_STATUS.FAILED); - - HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooBig.tsv", new String[]{ - String.format("%d\t%d", 0, Byte.MAX_VALUE + 1)}); - smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/byteTooBig.tsv", ExecJob.JOB_STATUS.FAILED); - } - - private void smallTinyIntBoundsCheckHelper(String data, ExecJob.JOB_STATUS expectedStatus) - throws Exception { - Assert.assertEquals(0, driver.run("drop table if exists test_tbl").getResponseCode()); - Assert.assertEquals(0, driver.run("create table test_tbl" + - " (my_small_int smallint, my_tiny_int tinyint) stored as rcfile").getResponseCode()); - - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("data = load '" + data + - "' using PigStorage('\t') as (my_small_int:int, my_tiny_int:int);"); - server.registerQuery( - "store data into 'test_tbl' using org.apache.hcatalog.pig.HCatStorer();"); - List jobs = server.executeBatch(); - Assert.assertEquals(expectedStatus, jobs.get(0).getStatus()); - } -} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorer.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorer.java deleted file mode 100644 index 6074821..0000000 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorer.java +++ /dev/null @@ -1,658 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.pig; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; - -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.apache.hcatalog.HcatTestUtils; -import org.apache.hcatalog.mapreduce.HCatBaseTest; -import org.apache.pig.EvalFunc; -import org.apache.pig.ExecType; -import org.apache.pig.PigException; -import org.apache.pig.PigServer; -import org.apache.pig.data.DataByteArray; -import org.apache.pig.data.Tuple; -import org.apache.pig.impl.logicalLayer.FrontendException; -import org.apache.pig.impl.util.LogUtils; -import org.junit.Assert; -import org.junit.Test; - -public class TestHCatStorer extends HCatBaseTest { - - private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; - - @Test - public void testPartColsInData() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int) partitioned by (b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - int LOOP_SIZE = 11; - String[] input = new String[LOOP_SIZE]; - for (int i = 0; i < LOOP_SIZE; i++) { - input[i] = i + "\t1"; - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('b=1');"); - server.registerQuery("B = load 'default.junit_unparted' using " + HCatLoader.class.getName() + "();"); - Iterator itr = server.openIterator("B"); - - int i = 0; - - while (itr.hasNext()) { - Tuple t = itr.next(); - Assert.assertEquals(2, t.size()); - Assert.assertEquals(t.get(0), i); - Assert.assertEquals(t.get(1), "1"); - i++; - } - - Assert.assertFalse(itr.hasNext()); - Assert.assertEquals(11, i); - } - - @Test - public void testMultiPartColsInData() throws IOException, CommandNeedRetryException { - - driver.run("drop table employee"); - String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + - " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; - - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - String[] inputData = {"111237\tKrishna\t01/01/1990\tM\tIN\tTN", - "111238\tKalpana\t01/01/2000\tF\tIN\tKA", - "111239\tSatya\t01/01/2001\tM\tIN\tKL", - "111240\tKavya\t01/01/2002\tF\tIN\tAP"}; - - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); - PigServer pig = new PigServer(ExecType.LOCAL); - pig.setBatchOn(); - pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + - "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); - pig.registerQuery("TN = FILTER A BY emp_state == 'TN';"); - pig.registerQuery("KA = FILTER A BY emp_state == 'KA';"); - pig.registerQuery("KL = FILTER A BY emp_state == 'KL';"); - pig.registerQuery("AP = FILTER A BY emp_state == 'AP';"); - pig.registerQuery("STORE TN INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=TN');"); - pig.registerQuery("STORE KA INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=KA');"); - pig.registerQuery("STORE KL INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=KL');"); - pig.registerQuery("STORE AP INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=AP');"); - pig.executeBatch(); - driver.run("select * from employee"); - ArrayList results = new ArrayList(); - driver.getResults(results); - Assert.assertEquals(4, results.size()); - Collections.sort(results); - Assert.assertEquals(inputData[0], results.get(0)); - Assert.assertEquals(inputData[1], results.get(1)); - Assert.assertEquals(inputData[2], results.get(2)); - Assert.assertEquals(inputData[3], results.get(3)); - driver.run("drop table employee"); - } - - @Test - public void testStoreInPartiitonedTbl() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int) partitioned by (b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - int LOOP_SIZE = 11; - String[] input = new String[LOOP_SIZE]; - for (int i = 0; i < LOOP_SIZE; i++) { - input[i] = i + ""; - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int);"); - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('b=1');"); - server.registerQuery("B = load 'default.junit_unparted' using " + HCatLoader.class.getName() + "();"); - Iterator itr = server.openIterator("B"); - - int i = 0; - - while (itr.hasNext()) { - Tuple t = itr.next(); - Assert.assertEquals(2, t.size()); - Assert.assertEquals(t.get(0), i); - Assert.assertEquals(t.get(1), "1"); - i++; - } - - Assert.assertFalse(itr.hasNext()); - Assert.assertEquals(11, i); - } - - @Test - public void testNoAlias() throws IOException, CommandNeedRetryException { - driver.run("drop table junit_parted"); - String createTable = "create table junit_parted(a int, b string) partitioned by (ds string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - PigServer server = new PigServer(ExecType.LOCAL); - boolean errCaught = false; - try { - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("B = foreach A generate a+10, b;"); - server.registerQuery("store B into 'junit_parted' using " + HCatStorer.class.getName() + "('ds=20100101');"); - server.executeBatch(); - } catch (PigException fe) { - PigException pe = LogUtils.getPigException(fe); - Assert.assertTrue(pe instanceof FrontendException); - Assert.assertEquals(PigHCatUtil.PIG_EXCEPTION_CODE, pe.getErrorCode()); - Assert.assertTrue(pe.getMessage().contains("Column name for a field is not specified. Please provide the full schema as an argument to HCatStorer.")); - errCaught = true; - } - Assert.assertTrue(errCaught); - errCaught = false; - try { - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, B:chararray);"); - server.registerQuery("B = foreach A generate a, B;"); - server.registerQuery("store B into 'junit_parted' using " + HCatStorer.class.getName() + "('ds=20100101');"); - server.executeBatch(); - } catch (PigException fe) { - PigException pe = LogUtils.getPigException(fe); - Assert.assertTrue(pe instanceof FrontendException); - Assert.assertEquals(PigHCatUtil.PIG_EXCEPTION_CODE, pe.getErrorCode()); - Assert.assertTrue(pe.getMessage().contains("Column names should all be in lowercase. Invalid name found: B")); - errCaught = true; - } - driver.run("drop table junit_parted"); - Assert.assertTrue(errCaught); - } - - @Test - public void testStoreMultiTables() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - driver.run("drop table junit_unparted2"); - createTable = "create table junit_unparted2(a int, b string) stored as RCFILE"; - retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - int LOOP_SIZE = 3; - String[] input = new String[LOOP_SIZE * LOOP_SIZE]; - int k = 0; - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - input[k++] = si + "\t" + j; - } - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("B = filter A by a < 2;"); - server.registerQuery("store B into 'junit_unparted' using " + HCatStorer.class.getName() + "();"); - server.registerQuery("C = filter A by a >= 2;"); - server.registerQuery("store C into 'junit_unparted2' using " + HCatStorer.class.getName() + "();"); - server.executeBatch(); - - driver.run("select * from junit_unparted"); - ArrayList res = new ArrayList(); - driver.getResults(res); - driver.run("select * from junit_unparted2"); - ArrayList res2 = new ArrayList(); - driver.getResults(res2); - - res.addAll(res2); - driver.run("drop table junit_unparted"); - driver.run("drop table junit_unparted2"); - - Iterator itr = res.iterator(); - for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) { - Assert.assertEquals(input[i], itr.next()); - } - - Assert.assertFalse(itr.hasNext()); - - } - - @Test - public void testStoreWithNoSchema() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - int LOOP_SIZE = 3; - String[] input = new String[LOOP_SIZE * LOOP_SIZE]; - int k = 0; - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - input[k++] = si + "\t" + j; - } - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('');"); - server.executeBatch(); - - driver.run("select * from junit_unparted"); - ArrayList res = new ArrayList(); - driver.getResults(res); - driver.run("drop table junit_unparted"); - Iterator itr = res.iterator(); - for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) { - Assert.assertEquals(input[i], itr.next()); - } - - Assert.assertFalse(itr.hasNext()); - - } - - @Test - public void testStoreWithNoCtorArgs() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - int LOOP_SIZE = 3; - String[] input = new String[LOOP_SIZE * LOOP_SIZE]; - int k = 0; - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - input[k++] = si + "\t" + j; - } - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into 'junit_unparted' using " + HCatStorer.class.getName() + "();"); - server.executeBatch(); - - driver.run("select * from junit_unparted"); - ArrayList res = new ArrayList(); - driver.getResults(res); - driver.run("drop table junit_unparted"); - Iterator itr = res.iterator(); - for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) { - Assert.assertEquals(input[i], itr.next()); - } - - Assert.assertFalse(itr.hasNext()); - - } - - @Test - public void testEmptyStore() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - int LOOP_SIZE = 3; - String[] input = new String[LOOP_SIZE * LOOP_SIZE]; - int k = 0; - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - input[k++] = si + "\t" + j; - } - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("B = filter A by a > 100;"); - server.registerQuery("store B into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int,b:chararray');"); - server.executeBatch(); - - driver.run("select * from junit_unparted"); - ArrayList res = new ArrayList(); - driver.getResults(res); - driver.run("drop table junit_unparted"); - Iterator itr = res.iterator(); - Assert.assertFalse(itr.hasNext()); - - } - - @Test - public void testBagNStruct() throws IOException, CommandNeedRetryException { - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(b string,a struct, arr_of_struct array, " + - "arr_of_struct2 array>, arr_of_struct3 array>) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - String[] inputData = new String[]{"zookeeper\t(2)\t{(pig)}\t{(pnuts,hdfs)}\t{(hadoop),(hcat)}", - "chubby\t(2)\t{(sawzall)}\t{(bigtable,gfs)}\t{(mapreduce),(hcat)}"}; - - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); - - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (b:chararray, a:tuple(a1:int), arr_of_struct:bag{mytup:tuple(s1:chararray)}, arr_of_struct2:bag{mytup:tuple(s1:chararray,s2:chararray)}, arr_of_struct3:bag{t3:tuple(s3:chararray)});"); - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','b:chararray, a:tuple(a1:int)," + - " arr_of_struct:bag{mytup:tuple(s1:chararray)}, arr_of_struct2:bag{mytup:tuple(s1:chararray,s2:chararray)}, arr_of_struct3:bag{t3:tuple(s3:chararray)}');"); - server.executeBatch(); - - driver.run("select * from junit_unparted"); - ArrayList res = new ArrayList(); - driver.getResults(res); - driver.run("drop table junit_unparted"); - Iterator itr = res.iterator(); - Assert.assertEquals("zookeeper\t{\"a1\":2}\t[\"pig\"]\t[{\"s1\":\"pnuts\",\"s2\":\"hdfs\"}]\t[{\"s3\":\"hadoop\"},{\"s3\":\"hcat\"}]", itr.next()); - Assert.assertEquals("chubby\t{\"a1\":2}\t[\"sawzall\"]\t[{\"s1\":\"bigtable\",\"s2\":\"gfs\"}]\t[{\"s3\":\"mapreduce\"},{\"s3\":\"hcat\"}]", itr.next()); - Assert.assertFalse(itr.hasNext()); - - } - - @Test - public void testStoreFuncAllSimpleTypes() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b float, c double, d bigint, e string, h boolean, f binary, g binary) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - int i = 0; - String[] input = new String[3]; - input[i++] = "0\t\t\t\t\t\t\t"; //Empty values except first column - input[i++] = "\t" + i * 2.1f + "\t" + i * 1.1d + "\t" + i * 2L + "\t" + "lets hcat" + "\t" + "true" + "\tbinary-data"; //First column empty - input[i++] = i + "\t" + i * 2.1f + "\t" + i * 1.1d + "\t" + i * 2L + "\t" + "lets hcat" + "\t" + "false" + "\tbinary-data"; - - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:float, c:double, d:long, e:chararray, h:boolean, f:bytearray);"); - //null gets stored into column g which is a binary field. - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int, b:float, c:double, d:long, e:chararray, h:boolean, f:bytearray');"); - server.executeBatch(); - - - driver.run("select * from junit_unparted"); - ArrayList res = new ArrayList(); - driver.getResults(res); - - Iterator itr = res.iterator(); - String next = itr.next(); - Assert.assertEquals("0\tNULL\tNULL\tNULL\tNULL\tNULL\tNULL\tNULL", next ); - Assert.assertEquals("NULL\t4.2\t2.2\t4\tlets hcat\ttrue\tbinary-data\tNULL", itr.next()); - Assert.assertEquals("3\t6.2999997\t3.3000000000000003\t6\tlets hcat\tfalse\tbinary-data\tNULL", itr.next()); - Assert.assertFalse(itr.hasNext()); - - server.registerQuery("B = load 'junit_unparted' using " + HCatLoader.class.getName() + ";"); - Iterator iter = server.openIterator("B"); - int count = 0; - int num5nulls = 0; - while (iter.hasNext()) { - Tuple t = iter.next(); - if (t.get(6) == null) { - num5nulls++; - } else { - Assert.assertTrue(t.get(6) instanceof DataByteArray); - } - Assert.assertNull(t.get(7)); - count++; - } - Assert.assertEquals(3, count); - Assert.assertEquals(1, num5nulls); - driver.run("drop table junit_unparted"); - } - - @Test - public void testStoreFuncSimple() throws IOException, CommandNeedRetryException { - - driver.run("drop table junit_unparted"); - String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - int LOOP_SIZE = 3; - String[] inputData = new String[LOOP_SIZE * LOOP_SIZE]; - int k = 0; - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - inputData[k++] = si + "\t" + j; - } - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int,b:chararray');"); - server.executeBatch(); - - driver.run("select * from junit_unparted"); - ArrayList res = new ArrayList(); - driver.getResults(res); - driver.run("drop table junit_unparted"); - Iterator itr = res.iterator(); - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - Assert.assertEquals(si + "\t" + j, itr.next()); - } - } - Assert.assertFalse(itr.hasNext()); - - } - - @Test - public void testDynamicPartitioningMultiPartColsInDataPartialSpec() throws IOException, CommandNeedRetryException { - - driver.run("drop table if exists employee"); - String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + - " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; - - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - String[] inputData = {"111237\tKrishna\t01/01/1990\tM\tIN\tTN", - "111238\tKalpana\t01/01/2000\tF\tIN\tKA", - "111239\tSatya\t01/01/2001\tM\tIN\tKL", - "111240\tKavya\t01/01/2002\tF\tIN\tAP"}; - - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); - PigServer pig = new PigServer(ExecType.LOCAL); - pig.setBatchOn(); - pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + - "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); - pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); - pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN');"); - pig.executeBatch(); - driver.run("select * from employee"); - ArrayList results = new ArrayList(); - driver.getResults(results); - Assert.assertEquals(4, results.size()); - Collections.sort(results); - Assert.assertEquals(inputData[0], results.get(0)); - Assert.assertEquals(inputData[1], results.get(1)); - Assert.assertEquals(inputData[2], results.get(2)); - Assert.assertEquals(inputData[3], results.get(3)); - driver.run("drop table employee"); - } - - @Test - public void testDynamicPartitioningMultiPartColsInDataNoSpec() throws IOException, CommandNeedRetryException { - - driver.run("drop table if exists employee"); - String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + - " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; - - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - String[] inputData = {"111237\tKrishna\t01/01/1990\tM\tIN\tTN", - "111238\tKalpana\t01/01/2000\tF\tIN\tKA", - "111239\tSatya\t01/01/2001\tM\tIN\tKL", - "111240\tKavya\t01/01/2002\tF\tIN\tAP"}; - - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); - PigServer pig = new PigServer(ExecType.LOCAL); - pig.setBatchOn(); - pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + - "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); - pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); - pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "();"); - pig.executeBatch(); - driver.run("select * from employee"); - ArrayList results = new ArrayList(); - driver.getResults(results); - Assert.assertEquals(4, results.size()); - Collections.sort(results); - Assert.assertEquals(inputData[0], results.get(0)); - Assert.assertEquals(inputData[1], results.get(1)); - Assert.assertEquals(inputData[2], results.get(2)); - Assert.assertEquals(inputData[3], results.get(3)); - driver.run("drop table employee"); - } - - @Test - public void testDynamicPartitioningMultiPartColsNoDataInDataNoSpec() throws IOException, CommandNeedRetryException { - - driver.run("drop table if exists employee"); - String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + - " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; - - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - - String[] inputData = {}; - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); - - PigServer pig = new PigServer(ExecType.LOCAL); - pig.setBatchOn(); - pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + - "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); - pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); - pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "();"); - pig.executeBatch(); - driver.run("select * from employee"); - ArrayList results = new ArrayList(); - driver.getResults(results); - Assert.assertEquals(0, results.size()); - driver.run("drop table employee"); - } - - public void testPartitionPublish() - throws IOException, CommandNeedRetryException { - - driver.run("drop table ptn_fail"); - String createTable = "create table ptn_fail(a int, c string) partitioned by (b string) stored as RCFILE"; - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table."); - } - int LOOP_SIZE = 11; - String[] input = new String[LOOP_SIZE]; - - for (int i = 0; i < LOOP_SIZE; i++) { - input[i] = i + "\tmath"; - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME - + "' as (a:int, c:chararray);"); - server.registerQuery("B = filter A by " + FailEvalFunc.class.getName() - + "($0);"); - server.registerQuery("store B into 'ptn_fail' using " - + HCatStorer.class.getName() + "('b=math');"); - server.executeBatch(); - - String query = "show partitions ptn_fail"; - retCode = driver.run(query).getResponseCode(); - - if (retCode != 0) { - throw new IOException("Error " + retCode + " running query " - + query); - } - - ArrayList res = new ArrayList(); - driver.getResults(res); - Assert.assertEquals(0, res.size()); - - // Make sure the partitions directory is not in hdfs. - Assert.assertTrue((new File(TEST_WAREHOUSE_DIR + "/ptn_fail")).exists()); - Assert.assertFalse((new File(TEST_WAREHOUSE_DIR + "/ptn_fail/b=math")) - .exists()); - } - - static public class FailEvalFunc extends EvalFunc { - - /* - * @param Tuple /* @return null /* @throws IOException - * - * @see org.apache.pig.EvalFunc#exec(org.apache.pig.data.Tuple) - */ - @Override - public Boolean exec(Tuple tuple) throws IOException { - throw new IOException("Eval Func to mimic Failure."); - } - - } -} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerMulti.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerMulti.java deleted file mode 100644 index 2660fc4..0000000 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerMulti.java +++ /dev/null @@ -1,201 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.pig; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; - -import junit.framework.TestCase; - -import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hcatalog.data.Pair; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; - -public class TestHCatStorerMulti extends TestCase { - private static final String TEST_DATA_DIR = - "/tmp/build/test/data/" + TestHCatStorerMulti.class.getCanonicalName(); - private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; - private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; - - private static final String BASIC_TABLE = "junit_unparted_basic"; - private static final String PARTITIONED_TABLE = "junit_parted_basic"; - private static Driver driver; - - private static Map> basicInputData; - - protected String storageFormat() { - return "RCFILE tblproperties('hcat.isd'='org.apache.hcatalog.rcfile.RCFileInputDriver'," + - "'hcat.osd'='org.apache.hcatalog.rcfile.RCFileOutputDriver')"; - } - - private void dropTable(String tablename) throws IOException, CommandNeedRetryException { - driver.run("drop table " + tablename); - } - - private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { - String createTable; - createTable = "create table " + tablename + "(" + schema + ") "; - if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { - createTable = createTable + "partitioned by (" + partitionedBy + ") "; - } - createTable = createTable + "stored as " + storageFormat(); - int retCode = driver.run(createTable).getResponseCode(); - if (retCode != 0) { - throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]"); - } - } - - private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { - createTable(tablename, schema, null); - } - - @Override - protected void setUp() throws Exception { - if (driver == null) { - HiveConf hiveConf = new HiveConf(this.getClass()); - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); - driver = new Driver(hiveConf); - SessionState.start(new CliSessionState(hiveConf)); - } - - cleanup(); - } - - @Override - protected void tearDown() throws Exception { - cleanup(); - } - - public void testStoreBasicTable() throws Exception { - - - createTable(BASIC_TABLE, "a int, b string"); - - populateBasicFile(); - - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hcatalog.pig.HCatStorer();"); - - server.executeBatch(); - - driver.run("select * from " + BASIC_TABLE); - ArrayList unpartitionedTableValuesReadFromHiveDriver = new ArrayList(); - driver.getResults(unpartitionedTableValuesReadFromHiveDriver); - assertEquals(basicInputData.size(), unpartitionedTableValuesReadFromHiveDriver.size()); - } - - public void testStorePartitionedTable() throws Exception { - createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); - - populateBasicFile(); - - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - - server.registerQuery("B2 = filter A by a < 2;"); - server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatStorer('bkt=0');"); - server.registerQuery("C2 = filter A by a >= 2;"); - server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatStorer('bkt=1');"); - - server.executeBatch(); - - driver.run("select * from " + PARTITIONED_TABLE); - ArrayList partitionedTableValuesReadFromHiveDriver = new ArrayList(); - driver.getResults(partitionedTableValuesReadFromHiveDriver); - assertEquals(basicInputData.size(), partitionedTableValuesReadFromHiveDriver.size()); - } - - public void testStoreTableMulti() throws Exception { - - - createTable(BASIC_TABLE, "a int, b string"); - createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); - - populateBasicFile(); - - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); - server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hcatalog.pig.HCatStorer();"); - - server.registerQuery("B2 = filter A by a < 2;"); - server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatStorer('bkt=0');"); - server.registerQuery("C2 = filter A by a >= 2;"); - server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hcatalog.pig.HCatStorer('bkt=1');"); - - server.executeBatch(); - - driver.run("select * from " + BASIC_TABLE); - ArrayList unpartitionedTableValuesReadFromHiveDriver = new ArrayList(); - driver.getResults(unpartitionedTableValuesReadFromHiveDriver); - driver.run("select * from " + PARTITIONED_TABLE); - ArrayList partitionedTableValuesReadFromHiveDriver = new ArrayList(); - driver.getResults(partitionedTableValuesReadFromHiveDriver); - assertEquals(basicInputData.size(), unpartitionedTableValuesReadFromHiveDriver.size()); - assertEquals(basicInputData.size(), partitionedTableValuesReadFromHiveDriver.size()); - } - - private void populateBasicFile() throws IOException { - int LOOP_SIZE = 3; - String[] input = new String[LOOP_SIZE * LOOP_SIZE]; - basicInputData = new HashMap>(); - int k = 0; - File file = new File(INPUT_FILE_NAME); - file.deleteOnExit(); - FileWriter writer = new FileWriter(file); - for (int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for (int j = 1; j <= LOOP_SIZE; j++) { - String sj = "S" + j + "S"; - input[k] = si + "\t" + sj; - basicInputData.put(k, new Pair(i, sj)); - writer.write(input[k] + "\n"); - k++; - } - } - writer.close(); - } - - private void cleanup() throws IOException, CommandNeedRetryException { - File f = new File(TEST_WAREHOUSE_DIR); - if (f.exists()) { - FileUtil.fullyDelete(f); - } - new File(TEST_WAREHOUSE_DIR).mkdirs(); - - dropTable(BASIC_TABLE); - dropTable(PARTITIONED_TABLE); - } -} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerWrapper.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerWrapper.java deleted file mode 100644 index a6680d2..0000000 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestHCatStorerWrapper.java +++ /dev/null @@ -1,92 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.pig; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.UUID; - -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.apache.hcatalog.HcatTestUtils; -import org.apache.hcatalog.mapreduce.HCatBaseTest; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.junit.Assert; -import org.junit.Test; - -/** - * This test checks the {@link HCatConstants#HCAT_PIG_STORER_EXTERNAL_LOCATION} that we can set in the - * UDFContext of {@link HCatStorer} so that it writes to the specified external location. - * - * Since {@link HCatStorer} does not allow extra parameters in the constructor, we use {@link HCatStorerWrapper} - * that always treats the last parameter as the external path. - */ -public class TestHCatStorerWrapper extends HCatBaseTest { - - private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; - - @Test - public void testStoreExternalTableWithExternalDir() throws IOException, CommandNeedRetryException{ - - File tmpExternalDir = new File(TEST_DATA_DIR, UUID.randomUUID().toString()); - tmpExternalDir.deleteOnExit(); - - String part_val = "100"; - - driver.run("drop table junit_external"); - String createTable = "create external table junit_external(a int, b string) partitioned by (c string) stored as RCFILE"; - Assert.assertEquals(0, driver.run(createTable).getResponseCode()); - - int LOOP_SIZE = 3; - String[] inputData = new String[LOOP_SIZE*LOOP_SIZE]; - int k = 0; - for(int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for(int j=1;j<=LOOP_SIZE;j++) { - inputData[k++] = si + "\t"+j; - } - } - HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); - PigServer server = new PigServer(ExecType.LOCAL); - server.setBatchOn(); - logAndRegister(server, "A = load '"+INPUT_FILE_NAME+"' as (a:int, b:chararray);"); - logAndRegister(server, "store A into 'default.junit_external' using " + HCatStorerWrapper.class.getName() - + "('c=" + part_val + "','" + tmpExternalDir.getPath().replaceAll("\\\\", "/") + "');"); - server.executeBatch(); - - Assert.assertTrue(tmpExternalDir.exists()); - Assert.assertTrue(new File(tmpExternalDir.getPath().replaceAll("\\\\", "/") + "/" + "part-m-00000").exists()); - - driver.run("select * from junit_external"); - ArrayList res = new ArrayList(); - driver.getResults(res); - driver.run("drop table junit_external"); - Iterator itr = res.iterator(); - for(int i = 1; i <= LOOP_SIZE; i++) { - String si = i + ""; - for(int j=1;j<=LOOP_SIZE;j++) { - Assert.assertEquals( si + "\t" + j + "\t" + part_val,itr.next()); - } - } - Assert.assertFalse(itr.hasNext()); - - } -} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatLoader.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatLoader.java deleted file mode 100644 index a251a75..0000000 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatLoader.java +++ /dev/null @@ -1,29 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.pig; - -public class TestOrcHCatLoader extends TestHCatLoader { - - @Override - protected String storageFormat() { - return "orc"; - } - -} - diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatLoaderComplexSchema.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatLoaderComplexSchema.java deleted file mode 100644 index 7d91364..0000000 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatLoaderComplexSchema.java +++ /dev/null @@ -1,28 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.pig; - -public class TestOrcHCatLoaderComplexSchema extends TestHCatLoaderComplexSchema { - - @Override - protected String storageFormat() { - return "orc"; - } - -} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatStorer.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatStorer.java deleted file mode 100644 index ca703ce..0000000 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestOrcHCatStorer.java +++ /dev/null @@ -1,28 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.pig; - -public class TestOrcHCatStorer extends TestHCatStorerMulti { - - @Override - protected String storageFormat() { - return "orc"; - } -} - diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPermsInheritance.java.broken hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPermsInheritance.java.broken deleted file mode 100644 index fce1e70..0000000 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPermsInheritance.java.broken +++ /dev/null @@ -1,135 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - --->There are two pieces of code that sets directory permissions. --->One that sets the UMask which only woks for dfs filesystem. --->And the other change the permission of directories after they are created. --->I removed that since it is not secure and just add more load on the namenode. --->We should push this test to e2e to verify what actually runs in production. - -package org.apache.hcatalog.pig; - -import java.io.IOException; - -import junit.framework.TestCase; - -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathFilter; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.metastore.api.UnknownTableException; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hcatalog.ExitException; -import org.apache.hcatalog.NoExitSecurityManager; -import org.apache.hcatalog.cli.HCatCli; -import org.apache.hcatalog.pig.HCatStorer; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.impl.util.UDFContext; -import org.apache.thrift.TException; - -public class TestPermsInheritance extends TestCase { - - @Override - protected void setUp() throws Exception { - super.setUp(); - securityManager = System.getSecurityManager(); - System.setSecurityManager(new NoExitSecurityManager()); - msc = new HiveMetaStoreClient(conf); - msc.dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME,"testNoPartTbl", true,true); - System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); - System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); - msc.dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME,"testPartTbl", true,true); - pig = new PigServer(ExecType.LOCAL, conf.getAllProperties()); - UDFContext.getUDFContext().setClientSystemProps(); - } - - private HiveMetaStoreClient msc; - private SecurityManager securityManager; - private PigServer pig; - - @Override - protected void tearDown() throws Exception { - super.tearDown(); - System.setSecurityManager(securityManager); - } - - private final HiveConf conf = new HiveConf(this.getClass()); - - public void testNoPartTbl() throws IOException, MetaException, UnknownTableException, TException, NoSuchObjectException, HiveException{ - - try{ - HCatCli.main(new String[]{"-e","create table testNoPartTbl (line string) stored as RCFILE", "-p","rwx-wx---"}); - } - catch(Exception e){ - assertTrue(e instanceof ExitException); - assertEquals(((ExitException)e).getStatus(), 0); - } - Warehouse wh = new Warehouse(conf); - Path dfsPath = wh.getTablePath(Hive.get(conf).getDatabase(MetaStoreUtils.DEFAULT_DATABASE_NAME), "testNoPartTbl"); - FileSystem fs = dfsPath.getFileSystem(conf); - assertEquals(fs.getFileStatus(dfsPath).getPermission(),FsPermission.valueOf("drwx-wx---")); - - pig.setBatchOn(); - pig.registerQuery("A = load 'build.xml' as (line:chararray);"); - pig.registerQuery("store A into 'testNoPartTbl' using "+HCatStorer.class.getName()+"();"); - pig.executeBatch(); - FileStatus[] status = fs.listStatus(dfsPath,hiddenFileFilter); - - assertEquals(status.length, 1); - assertEquals(FsPermission.valueOf("drwx-wx---"),status[0].getPermission()); - - try{ - HCatCli.main(new String[]{"-e","create table testPartTbl (line string) partitioned by (a string) stored as RCFILE", "-p","rwx-wx--x"}); - } - catch(Exception e){ - assertTrue(e instanceof ExitException); - assertEquals(((ExitException)e).getStatus(), 0); - } - - dfsPath = wh.getTablePath(Hive.get(conf).getDatabase(MetaStoreUtils.DEFAULT_DATABASE_NAME), "testPartTbl"); - assertEquals(fs.getFileStatus(dfsPath).getPermission(),FsPermission.valueOf("drwx-wx--x")); - - pig.setBatchOn(); - pig.registerQuery("A = load 'build.xml' as (line:chararray);"); - pig.registerQuery("store A into 'testPartTbl' using "+HCatStorer.class.getName()+"('a=part');"); - pig.executeBatch(); - - Path partPath = new Path(dfsPath,"a=part"); - assertEquals(FsPermission.valueOf("drwx-wx--x"),fs.getFileStatus(partPath).getPermission()); - status = fs.listStatus(partPath,hiddenFileFilter); - assertEquals(status.length, 1); - assertEquals(FsPermission.valueOf("drwx-wx--x"),status[0].getPermission()); - } - - private static final PathFilter hiddenFileFilter = new PathFilter(){ - public boolean accept(Path p){ - String name = p.getName(); - return !name.startsWith("_") && !name.startsWith("."); - } - }; -} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigHCatUtil.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigHCatUtil.java deleted file mode 100644 index 108034a..0000000 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigHCatUtil.java +++ /dev/null @@ -1,95 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.pig; - -import com.google.common.collect.Lists; -import junit.framework.Assert; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.pig.ResourceSchema; -import org.apache.pig.ResourceSchema.ResourceFieldSchema; -import org.apache.pig.data.DataType; -import org.apache.pig.impl.util.UDFContext; -import org.junit.Test; - -public class TestPigHCatUtil { - - @Test - public void testGetBagSubSchema() throws Exception { - - // Define the expected schema. - ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1]; - bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("innertuple") - .setDescription("The tuple in the bag").setType(DataType.TUPLE); - - ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; - innerTupleFieldSchemas[0] = - new ResourceFieldSchema().setName("innerfield").setType(DataType.CHARARRAY); - - bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas)); - ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas); - - // Get the actual converted schema. - HCatSchema hCatSchema = new HCatSchema(Lists.newArrayList( - new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null))); - HCatFieldSchema hCatFieldSchema = - new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, hCatSchema, null); - ResourceSchema actual = PigHCatUtil.getBagSubSchema(hCatFieldSchema); - - Assert.assertEquals(expected.toString(), actual.toString()); - } - - @Test - public void testGetBagSubSchemaConfigured() throws Exception { - - // NOTE: pig-0.8 sets client system properties by actually getting the client - // system properties. Starting in pig-0.9 you must pass the properties in. - // When updating our pig dependency this will need updated. - System.setProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME, "t"); - System.setProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME, "FIELDNAME_tuple"); - UDFContext.getUDFContext().setClientSystemProps(System.getProperties()); - - // Define the expected schema. - ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1]; - bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("t") - .setDescription("The tuple in the bag").setType(DataType.TUPLE); - - ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; - innerTupleFieldSchemas[0] = - new ResourceFieldSchema().setName("llama_tuple").setType(DataType.CHARARRAY); - - bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas)); - ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas); - - // Get the actual converted schema. - HCatSchema actualHCatSchema = new HCatSchema(Lists.newArrayList( - new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null))); - HCatFieldSchema actualHCatFieldSchema = - new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, actualHCatSchema, null); - ResourceSchema actual = PigHCatUtil.getBagSubSchema(actualHCatFieldSchema); - - Assert.assertEquals(expected.toString(), actual.toString()); - - // Clean up System properties that were set by this test - System.clearProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME); - System.clearProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME); - } -} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigStorageDriver.java.broken hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigStorageDriver.java.broken deleted file mode 100644 index fdf3a98..0000000 --- hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hcatalog/pig/TestPigStorageDriver.java.broken +++ /dev/null @@ -1,272 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.pig; - -import java.io.BufferedInputStream; -import java.io.DataInputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileWriter; -import java.io.IOException; -import java.io.PrintWriter; -import java.util.Iterator; -import java.util.Map; - -import junit.framework.TestCase; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.api.InvalidOperationException; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.UnknownTableException; -import org.apache.hadoop.hive.ql.CommandNeedRetryException; -import org.apache.hadoop.hive.ql.Driver; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.pig.HCatLoader; -import org.apache.pig.ExecType; -import org.apache.pig.PigServer; -import org.apache.pig.data.Tuple; -import org.apache.pig.impl.logicalLayer.FrontendException; -import org.apache.pig.impl.util.UDFContext; -import org.apache.thrift.TException; - -public class TestPigStorageDriver extends TestCase { - - private HiveConf hcatConf; - private Driver hcatDriver; - private HiveMetaStoreClient msc; - private static String tblLocation = "/tmp/test_pig/data"; - private static String anyExistingFileInCurDir = "ivy.xml"; - private static String warehouseDir = "/tmp/hcat_junit_warehouse"; - - @Override - protected void setUp() throws Exception { - - hcatConf = new HiveConf(this.getClass()); - hcatConf.set(ConfVars.PREEXECHOOKS.varname, ""); - hcatConf.set(ConfVars.POSTEXECHOOKS.varname, ""); - hcatConf.set(ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); - hcatDriver = new Driver(hcatConf); - msc = new HiveMetaStoreClient(hcatConf); - SessionState.start(new CliSessionState(hcatConf)); - super.setUp(); - } - - @Override - protected void tearDown() throws Exception { - super.tearDown(); - } - - public void testPigStorageDriver() throws IOException, CommandNeedRetryException{ - - String fsLoc = hcatConf.get("fs.default.name"); - Path tblPath = new Path(fsLoc, tblLocation); - String tblName = "junit_pigstorage"; - tblPath.getFileSystem(hcatConf).copyFromLocalFile(new Path(anyExistingFileInCurDir),tblPath); - - hcatDriver.run("drop table " + tblName); - CommandProcessorResponse resp; - String createTable = "create table " + tblName + " (a string) partitioned by (b string) stored as TEXTFILE"; - - resp = hcatDriver.run(createTable); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - resp = hcatDriver.run("alter table " + tblName + " add partition (b='2010-10-10') location '"+new Path(fsLoc, "/tmp/test_pig")+"'"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - resp = hcatDriver.run("alter table " + tblName + " partition (b='2010-10-10') set fileformat TEXTFILE"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - resp = hcatDriver.run("desc extended " + tblName + " partition (b='2010-10-10')"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - PigServer server = new PigServer(ExecType.LOCAL, hcatConf.getAllProperties()); - UDFContext.getUDFContext().setClientSystemProps(); - server.registerQuery(" a = load '" + tblName + "' using "+HCatLoader.class.getName()+";"); - Iterator itr = server.openIterator("a"); - boolean result = compareWithFile(itr, anyExistingFileInCurDir, 2, "2010-10-10", null); - assertTrue(result); - - server.registerQuery("a = load '"+tblPath.toString()+"' using PigStorage() as (a:chararray);"); - server.store("a", tblName, HCatStorer.class.getName() + "('b=2010-10-11')"); - - server.registerQuery("a = load '" + warehouseDir + "/" + tblName + "/b=2010-10-11' using PigStorage() as (a:chararray);"); - itr = server.openIterator("a"); - result = compareWithFile(itr, anyExistingFileInCurDir, 1, "2010-10-11", null); - assertTrue(result); - - // Test multi-store - server.registerQuery("a = load '"+tblPath.toString()+"' using PigStorage() as (a:chararray);"); - server.registerQuery("store a into '" + tblName + "' using " + HCatStorer.class.getName() + "('b=2010-11-01');"); - server.registerQuery("store a into '" + tblName + "' using " + HCatStorer.class.getName() + "('b=2010-11-02');"); - - server.registerQuery("a = load '" + warehouseDir + "/" + tblName + "/b=2010-11-01' using PigStorage() as (a:chararray);"); - itr = server.openIterator("a"); - result = compareWithFile(itr, anyExistingFileInCurDir, 1, "2010-11-01", null); - assertTrue(result); - - server.registerQuery("a = load '" + warehouseDir + "/" + tblName + "/b=2010-11-02' using PigStorage() as (a:chararray);"); - itr = server.openIterator("a"); - result = compareWithFile(itr, anyExistingFileInCurDir, 1, "2010-11-02", null); - assertTrue(result); - - hcatDriver.run("drop table " + tblName); - } - - private boolean compareWithFile(Iterator itr, String factFile, int numColumn, String key, String valueSuffix) throws IOException { - DataInputStream stream = new DataInputStream(new BufferedInputStream(new FileInputStream(new File(factFile)))); - while(itr.hasNext()){ - Tuple t = itr.next(); - assertEquals(numColumn, t.size()); - if(t.get(0) != null) { - // If underlying data-field is empty. PigStorage inserts null instead - // of empty String objects. - assertTrue(t.get(0) instanceof String); - String expected = stream.readLine(); - if (valueSuffix!=null) - expected += valueSuffix; - assertEquals(expected, t.get(0)); - } - else{ - assertTrue(stream.readLine().isEmpty()); - } - - if (numColumn>1) { - // The second column must be key - assertTrue(t.get(1) instanceof String); - assertEquals(key, t.get(1)); - } - } - assertEquals(0,stream.available()); - stream.close(); - return true; - } - - public void testDelim() throws MetaException, TException, UnknownTableException, NoSuchObjectException, InvalidOperationException, IOException, CommandNeedRetryException{ - - hcatDriver.run("drop table junit_pigstorage_delim"); - - CommandProcessorResponse resp; - String createTable = "create table junit_pigstorage_delim (a0 string, a1 string) partitioned by (b string) stored as RCFILE"; - - resp = hcatDriver.run(createTable); - - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - resp = hcatDriver.run("alter table junit_pigstorage_delim add partition (b='2010-10-10')"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - resp = hcatDriver.run("alter table junit_pigstorage_delim partition (b='2010-10-10') set fileformat TEXTFILE"); - - Partition part = msc.getPartition(MetaStoreUtils.DEFAULT_DATABASE_NAME, "junit_pigstorage_delim", "b=2010-10-10"); - Map partParms = part.getParameters(); - partParms.put(HCatConstants.HCAT_PIG_LOADER_ARGS, "control-A"); - partParms.put(HCatConstants.HCAT_PIG_STORER_ARGS, "control-A"); - - msc.alter_partition(MetaStoreUtils.DEFAULT_DATABASE_NAME, "junit_pigstorage_delim", part); - - PigServer server = new PigServer(ExecType.LOCAL, hcatConf.getAllProperties()); - UDFContext.getUDFContext().setClientSystemProps(); - server.registerQuery(" a = load 'junit_pigstorage_delim' using "+HCatLoader.class.getName()+";"); - try{ - server.openIterator("a"); - }catch(FrontendException fe){} - - resp = hcatDriver.run("alter table junit_pigstorage_delim set fileformat TEXTFILE"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - resp = hcatDriver.run("alter table junit_pigstorage_delim set TBLPROPERTIES ('hcat.pig.loader.args'=':', 'hcat.pig.storer.args'=':')"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - File inputFile = File.createTempFile("hcat_test", ""); - PrintWriter p = new PrintWriter(new FileWriter(inputFile)); - p.println("1\t2"); - p.println("3\t4"); - p.close(); - server.registerQuery("a = load '"+inputFile.toString()+"' using PigStorage() as (a0:chararray, a1:chararray);"); - server.store("a", "junit_pigstorage_delim", HCatStorer.class.getName() + "('b=2010-10-11')"); - - server.registerQuery("a = load '/tmp/hcat_junit_warehouse/junit_pigstorage_delim/b=2010-10-11' using PigStorage() as (a:chararray);"); - Iterator itr = server.openIterator("a"); - - assertTrue(itr.hasNext()); - Tuple t = itr.next(); - assertTrue(t.get(0).equals("1:2")); - - assertTrue(itr.hasNext()); - t = itr.next(); - assertTrue(t.get(0).equals("3:4")); - - assertFalse(itr.hasNext()); - inputFile.delete(); - } - - public void testMultiConstructArgs() throws MetaException, TException, UnknownTableException, NoSuchObjectException, InvalidOperationException, IOException, CommandNeedRetryException{ - - String fsLoc = hcatConf.get("fs.default.name"); - Path tblPath = new Path(fsLoc, tblLocation); - String tblName = "junit_pigstorage_constructs"; - tblPath.getFileSystem(hcatConf).copyFromLocalFile(new Path(anyExistingFileInCurDir),tblPath); - - hcatDriver.run("drop table junit_pigstorage_constructs"); - - CommandProcessorResponse resp; - String createTable = "create table " + tblName + " (a string) partitioned by (b string) stored as TEXTFILE"; - - resp = hcatDriver.run(createTable); - - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - resp = hcatDriver.run("alter table " + tblName + " set TBLPROPERTIES ('hcat.pig.storer'='org.apache.hcatalog.pig.MyPigStorage', 'hcat.pig.storer.args'=':#hello', 'hcat.pig.args.delimiter'='#')"); - assertEquals(0, resp.getResponseCode()); - assertNull(resp.getErrorMessage()); - - PigServer server = new PigServer(ExecType.LOCAL, hcatConf.getAllProperties()); - UDFContext.getUDFContext().setClientSystemProps(); - - server.registerQuery("a = load '"+tblPath.toString()+"' using PigStorage() as (a:chararray);"); - server.store("a", tblName, HCatStorer.class.getName() + "('b=2010-10-11')"); - - server.registerQuery("a = load '" + warehouseDir + "/" + tblName + "/b=2010-10-11' using PigStorage() as (a:chararray);"); - Iterator itr = server.openIterator("a"); - boolean result = compareWithFile(itr, anyExistingFileInCurDir, 1, "2010-10-11", ":hello"); - assertTrue(result); - } -} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/HCatStorerWrapper.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/HCatStorerWrapper.java new file mode 100644 index 0000000..a7dd185 --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/HCatStorerWrapper.java @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.pig; + +import java.io.IOException; +import java.util.Properties; + +import org.apache.hadoop.mapreduce.Job; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.pig.impl.util.UDFContext; + +/** + * This class is used to test the HCAT_PIG_STORER_EXTERNAL_LOCATION property used in HCatStorer. + * When this property is set, HCatStorer writes the output to the location it specifies. Since + * the property can only be set in the UDFContext, we need this simpler wrapper to do three things: + *
    + *
  1. save the external dir specified in the Pig script
  2. + *
  3. set the same UDFContext signature as HCatStorer
  4. + *
  5. before {@link HCatStorer#setStoreLocation(String, Job)}, set the external dir in the UDFContext.
  6. + *
+ */ +public class HCatStorerWrapper extends HCatStorer { + + private String sign; + private String externalDir; + + public HCatStorerWrapper(String partSpecs, String schema, String externalDir) throws Exception { + super(partSpecs, schema); + this.externalDir = externalDir; + } + + public HCatStorerWrapper(String partSpecs, String externalDir) throws Exception { + super(partSpecs); + this.externalDir = externalDir; + } + + public HCatStorerWrapper(String externalDir) throws Exception{ + super(); + this.externalDir = externalDir; + } + + @Override + public void setStoreLocation(String location, Job job) throws IOException { + Properties udfProps = UDFContext.getUDFContext().getUDFProperties( + this.getClass(), new String[] { sign }); + udfProps.setProperty(HCatConstants.HCAT_PIG_STORER_EXTERNAL_LOCATION, externalDir); + super.setStoreLocation(location, job); + } + + @Override + public void setStoreFuncUDFContextSignature(String signature) { + sign = signature; + super.setStoreFuncUDFContextSignature(signature); + } +} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MockLoader.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MockLoader.java new file mode 100644 index 0000000..a368b01 --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MockLoader.java @@ -0,0 +1,180 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.pig; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapreduce.InputFormat; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.pig.LoadFunc; +import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit; +import org.apache.pig.data.Tuple; + +public class MockLoader extends LoadFunc { + private static final class MockRecordReader extends RecordReader { + @Override + public void close() throws IOException { + } + + @Override + public Object getCurrentKey() throws IOException, InterruptedException { + return "mockKey"; + } + + @Override + public Object getCurrentValue() throws IOException, InterruptedException { + return "mockValue"; + } + + @Override + public float getProgress() throws IOException, InterruptedException { + return 0.5f; + } + + @Override + public void initialize(InputSplit split, TaskAttemptContext arg1) throws IOException, + InterruptedException { + } + + @Override + public boolean nextKeyValue() throws IOException, InterruptedException { + return true; + } + } + + private static final class MockInputSplit extends InputSplit implements Writable { + private String location; + + public MockInputSplit() { + } + + public MockInputSplit(String location) { + this.location = location; + } + + @Override + public String[] getLocations() throws IOException, InterruptedException { + return new String[]{location}; + } + + @Override + public long getLength() throws IOException, InterruptedException { + return 10000000; + } + + @Override + public boolean equals(Object arg0) { + return arg0 == this; + } + + @Override + public int hashCode() { + return location.hashCode(); + } + + @Override + public void readFields(DataInput arg0) throws IOException { + location = arg0.readUTF(); + } + + @Override + public void write(DataOutput arg0) throws IOException { + arg0.writeUTF(location); + } + } + + private static final class MockInputFormat extends InputFormat { + + private final String location; + + public MockInputFormat(String location) { + this.location = location; + } + + @Override + public RecordReader createRecordReader(InputSplit arg0, TaskAttemptContext arg1) + throws IOException, InterruptedException { + return new MockRecordReader(); + } + + @Override + public List getSplits(JobContext arg0) throws IOException, InterruptedException { + return Arrays.asList(new MockInputSplit(location)); + } + } + + private static final Map> locationToData = new HashMap>(); + + public static void setData(String location, Iterable data) { + locationToData.put(location, data); + } + + private String location; + + private Iterator data; + + @Override + public String relativeToAbsolutePath(String location, Path curDir) throws IOException { + return location; + } + + @Override + public void setLocation(String location, Job job) throws IOException { + this.location = location; + if (location == null) { + throw new IOException("null location passed to MockLoader"); + } + this.data = locationToData.get(location).iterator(); + if (this.data == null) { + throw new IOException("No data configured for location: " + location); + } + } + + @Override + public Tuple getNext() throws IOException { + if (data == null) { + throw new IOException("data was not correctly initialized in MockLoader"); + } + return data.hasNext() ? data.next() : null; + } + + @Override + public InputFormat getInputFormat() throws IOException { + return new MockInputFormat(location); + } + + @Override + public void prepareToRead(RecordReader arg0, PigSplit arg1) throws IOException { + } + +} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MyPigStorage.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MyPigStorage.java new file mode 100644 index 0000000..76d7561 --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/MyPigStorage.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.pig; + +import java.io.IOException; + +import org.apache.pig.builtin.PigStorage; +import org.apache.pig.data.Tuple; + +public class MyPigStorage extends PigStorage { + + String arg2; + + public MyPigStorage(String arg1, String arg2) throws IOException { + super(arg1); + this.arg2 = arg2; + } + + @Override + public void putNext(Tuple t) throws IOException { + t.append(arg2); + super.putNext(t); + } +} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestE2EScenarios.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestE2EScenarios.java new file mode 100644 index 0000000..2c7487c --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestE2EScenarios.java @@ -0,0 +1,230 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.pig; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.Iterator; + +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.CommandNeedRetryException; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hive.hcatalog.HcatTestUtils; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatContext; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hive.hcatalog.mapreduce.HCatOutputFormat; +import org.apache.hive.hcatalog.mapreduce.OutputJobInfo; +import org.apache.hive.hcatalog.mapreduce.HCatMapRedUtil; +import org.apache.pig.ExecType; +import org.apache.pig.PigServer; +import org.apache.pig.data.Tuple; + +public class TestE2EScenarios extends TestCase { + + private static final String TEST_DATA_DIR = System.getProperty("user.dir") + + "/build/test/data/" + TestHCatLoader.class.getCanonicalName(); + private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; + + private static final String TEXTFILE_LOCN = TEST_DATA_DIR + "/textfile"; + + private static Driver driver; + + protected String storageFormat() { + return "orc"; + } + + @Override + protected void setUp() throws Exception { + + File f = new File(TEST_WAREHOUSE_DIR); + if (f.exists()) { + FileUtil.fullyDelete(f); + } + new File(TEST_WAREHOUSE_DIR).mkdirs(); + + HiveConf hiveConf = new HiveConf(this.getClass()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); + driver = new Driver(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); + + } + + @Override + protected void tearDown() throws Exception { + dropTable("inpy"); + dropTable("rc5318"); + dropTable("orc5318"); + } + + private void dropTable(String tablename) throws IOException, CommandNeedRetryException { + driver.run("drop table " + tablename); + } + + private void createTable(String tablename, String schema, String partitionedBy, String storageFormat) throws IOException, CommandNeedRetryException { + String createTable; + createTable = "create table " + tablename + "(" + schema + ") "; + if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { + createTable = createTable + "partitioned by (" + partitionedBy + ") "; + } + if (storageFormat != null){ + createTable = createTable + "stored as " +storageFormat; + } + driverRun(createTable); + } + + private void driverRun(String cmd) throws IOException, CommandNeedRetryException { + int retCode = driver.run(cmd).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to run [" + + cmd + "], return code from hive driver : [" + retCode + "]"); + } + } + + private void pigDump(String tableName) throws IOException { + PigServer server = new PigServer(ExecType.LOCAL); + + System.err.println("==="); + System.err.println(tableName+":"); + server.registerQuery("X = load '" + tableName + + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + Iterator XIter = server.openIterator("X"); + while (XIter.hasNext()) { + Tuple t = XIter.next(); + for (Object o : t.getAll()){ + System.err.print( + "\t(" + o.getClass().getName() + ":" + + o.toString() + ")" + ); + } + System.err.println(""); + } + System.err.println("==="); + } + + + private void copyTable(String in, String out) throws IOException, InterruptedException { + Job ijob = new Job(); + Job ojob = new Job(); + HCatInputFormat inpy = new HCatInputFormat(); + inpy.setInput(ijob , null, in); + HCatOutputFormat oupy = new HCatOutputFormat(); + oupy.setOutput(ojob, + OutputJobInfo.create(null, out, new HashMap() + )); + + // Test HCatContext + + System.err.println("HCatContext INSTANCE is present : " +HCatContext.INSTANCE.getConf().isPresent()); + if (HCatContext.INSTANCE.getConf().isPresent()){ + System.err.println("HCatContext tinyint->int promotion says " + + HCatContext.INSTANCE.getConf().get().getBoolean( + HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION, + HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT)); + } + + HCatSchema tableSchema = inpy.getTableSchema(ijob.getConfiguration()); + System.err.println("Copying from ["+in+"] to ["+out+"] with schema : "+ tableSchema.toString()); + oupy.setSchema(ojob, tableSchema); + oupy.checkOutputSpecs(ojob); + OutputCommitter oc = oupy.getOutputCommitter(createTaskAttemptContext(ojob.getConfiguration())); + oc.setupJob(ojob); + + for (InputSplit split : inpy.getSplits(ijob)){ + + TaskAttemptContext rtaskContext = createTaskAttemptContext(ijob.getConfiguration()); + TaskAttemptContext wtaskContext = createTaskAttemptContext(ojob.getConfiguration()); + + RecordReader rr = inpy.createRecordReader(split, rtaskContext); + rr.initialize(split, rtaskContext); + + OutputCommitter taskOc = oupy.getOutputCommitter(wtaskContext); + taskOc.setupTask(wtaskContext); + RecordWriter, HCatRecord> rw = oupy.getRecordWriter(wtaskContext); + + while(rr.nextKeyValue()){ + rw.write(rr.getCurrentKey(), rr.getCurrentValue()); + } + rw.close(wtaskContext); + taskOc.commitTask(wtaskContext); + rr.close(); + } + + oc.commitJob(ojob); + } + + private TaskAttemptContext createTaskAttemptContext(Configuration tconf) { + Configuration conf = (tconf == null) ? (new Configuration()) : tconf; + TaskAttemptID taskId = new TaskAttemptID(); + conf.setInt("mapred.task.partition", taskId.getId()); + conf.set("mapred.task.id", "attempt__0000_r_000000_" + taskId.getId()); + TaskAttemptContext rtaskContext = HCatMapRedUtil.createTaskAttemptContext(conf , taskId); + return rtaskContext; + } + + + public void testReadOrcAndRCFromPig() throws Exception { + String tableSchema = "ti tinyint, si smallint,i int, bi bigint, f float, d double, b boolean"; + + HcatTestUtils.createTestDataFile(TEXTFILE_LOCN, + new String[]{ + "-3\0019001\00186400\0014294967297\00134.532\0012184239842983489.1231231234\001true" + ,"0\0010\0010\0010\0010\0010\001false" + } + ); + + // write this out to a file, and import it into hive + createTable("inpy",tableSchema,null,"textfile"); + createTable("rc5318",tableSchema,null,"rcfile"); + createTable("orc5318",tableSchema,null,"orc"); + driverRun("LOAD DATA LOCAL INPATH '"+TEXTFILE_LOCN+"' OVERWRITE INTO TABLE inpy"); + + // write it out from hive to an rcfile table, and to an orc table +// driverRun("insert overwrite table rc5318 select * from inpy"); + copyTable("inpy","rc5318"); +// driverRun("insert overwrite table orc5318 select * from inpy"); + copyTable("inpy","orc5318"); + + pigDump("inpy"); + pigDump("rc5318"); + pigDump("orc5318"); + + } + +} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatEximLoader.java.broken hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatEximLoader.java.broken new file mode 100644 index 0000000..238edb2 --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatEximLoader.java.broken @@ -0,0 +1,352 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hcatalog.pig; + +import java.io.IOException; +import java.util.Iterator; +import java.util.Map; +import java.util.Properties; +import java.util.TreeMap; + +import junit.framework.TestCase; + +import org.apache.hcatalog.MiniCluster; +import org.apache.pig.ExecType; +import org.apache.pig.PigServer; +import org.apache.pig.backend.executionengine.ExecException; +import org.apache.pig.data.Tuple; +import org.apache.pig.impl.util.UDFContext; + +/** + * + * TestHCatEximLoader. Assumes Exim storer is working well + * + */ +public class TestHCatEximLoader extends TestCase { + + private static final String NONPART_TABLE = "junit_unparted"; + private static final String PARTITIONED_TABLE = "junit_parted"; + private static MiniCluster cluster = MiniCluster.buildCluster(); + + private static final String dataLocation = "/tmp/data"; + private static String fqdataLocation; + private static final String exportLocation = "/tmp/export"; + private static String fqexportLocation; + + private static Properties props; + + private void cleanup() throws IOException { + MiniCluster.deleteFile(cluster, dataLocation); + MiniCluster.deleteFile(cluster, exportLocation); + } + + @Override + protected void setUp() throws Exception { + props = new Properties(); + props.setProperty("fs.default.name", cluster.getProperties().getProperty("fs.default.name")); + System.out.println("Filesystem class : " + cluster.getFileSystem().getClass().getName() + + ", fs.default.name : " + props.getProperty("fs.default.name")); + fqdataLocation = cluster.getProperties().getProperty("fs.default.name") + dataLocation; + fqexportLocation = cluster.getProperties().getProperty("fs.default.name") + exportLocation; + System.out.println("FQ Data Location :" + fqdataLocation); + System.out.println("FQ Export Location :" + fqexportLocation); + cleanup(); + } + + @Override + protected void tearDown() throws Exception { + cleanup(); + } + + private void populateDataFile() throws IOException { + MiniCluster.deleteFile(cluster, dataLocation); + String[] input = new String[] { + "237,Krishna,01/01/1990,M,IN,TN", + "238,Kalpana,01/01/2000,F,IN,KA", + "239,Satya,01/01/2001,M,US,TN", + "240,Kavya,01/01/2002,F,US,KA" + }; + MiniCluster.createInputFile(cluster, dataLocation, input); + } + + private static class EmpDetail { + String name; + String dob; + String mf; + String country; + String state; + } + + private void assertEmpDetail(Tuple t, Map eds) throws ExecException { + assertNotNull(t); + assertEquals(6, t.size()); + + assertTrue(t.get(0).getClass() == Integer.class); + assertTrue(t.get(1).getClass() == String.class); + assertTrue(t.get(2).getClass() == String.class); + assertTrue(t.get(3).getClass() == String.class); + assertTrue(t.get(4).getClass() == String.class); + assertTrue(t.get(5).getClass() == String.class); + + EmpDetail ed = eds.remove(t.get(0)); + assertNotNull(ed); + + assertEquals(ed.name, t.get(1)); + assertEquals(ed.dob, t.get(2)); + assertEquals(ed.mf, t.get(3)); + assertEquals(ed.country, t.get(4)); + assertEquals(ed.state, t.get(5)); + } + + private void addEmpDetail(Map empDetails, int id, String name, + String dob, String mf, String country, String state) { + EmpDetail ed = new EmpDetail(); + ed.name = name; + ed.dob = dob; + ed.mf = mf; + ed.country = country; + ed.state = state; + empDetails.put(id, ed); + } + + + + private void assertEmpDetail(Tuple t, Integer id, String name, String dob, String mf) + throws ExecException { + assertNotNull(t); + assertEquals(4, t.size()); + assertTrue(t.get(0).getClass() == Integer.class); + assertTrue(t.get(1).getClass() == String.class); + assertTrue(t.get(2).getClass() == String.class); + assertTrue(t.get(3).getClass() == String.class); + + assertEquals(id, t.get(0)); + assertEquals(name, t.get(1)); + assertEquals(dob, t.get(2)); + assertEquals(mf, t.get(3)); + } + + private void assertEmpDetail(Tuple t, String mf, String name) + throws ExecException { + assertNotNull(t); + assertEquals(2, t.size()); + assertTrue(t.get(0).getClass() == String.class); + assertTrue(t.get(1).getClass() == String.class); + + assertEquals(mf, t.get(0)); + assertEquals(name, t.get(1)); + } + + + + public void testLoadNonPartTable() throws Exception { + populateDataFile(); + { + PigServer server = new PigServer(ExecType.LOCAL, props); + UDFContext.getUDFContext().setClientSystemProps(); + server.setBatchOn(); + server + .registerQuery("A = load '" + + fqdataLocation + + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray);"); + server.registerQuery("store A into '" + NONPART_TABLE + + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "');"); + server.executeBatch(); + } + { + PigServer server = new PigServer(ExecType.LOCAL, props); + UDFContext.getUDFContext().setClientSystemProps(); + + server + .registerQuery("A = load '" + + fqexportLocation + + "' using org.apache.hcatalog.pig.HCatEximLoader();"); + Iterator XIter = server.openIterator("A"); + assertTrue(XIter.hasNext()); + Tuple t = XIter.next(); + assertEmpDetail(t, 237, "Krishna", "01/01/1990", "M"); + assertTrue(XIter.hasNext()); + t = XIter.next(); + assertEmpDetail(t, 238, "Kalpana", "01/01/2000", "F"); + assertTrue(XIter.hasNext()); + t = XIter.next(); + assertEmpDetail(t, 239, "Satya", "01/01/2001", "M"); + assertTrue(XIter.hasNext()); + t = XIter.next(); + assertEmpDetail(t, 240, "Kavya", "01/01/2002", "F"); + assertFalse(XIter.hasNext()); + } + } + + public void testLoadNonPartProjection() throws Exception { + populateDataFile(); + { + PigServer server = new PigServer(ExecType.LOCAL, props); + UDFContext.getUDFContext().setClientSystemProps(); + server.setBatchOn(); + server + .registerQuery("A = load '" + + fqdataLocation + + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray);"); + server.registerQuery("store A into '" + NONPART_TABLE + + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "');"); + server.executeBatch(); + } + { + PigServer server = new PigServer(ExecType.LOCAL, props); + UDFContext.getUDFContext().setClientSystemProps(); + + server + .registerQuery("A = load '" + + fqexportLocation + + "' using org.apache.hcatalog.pig.HCatEximLoader();"); + server.registerQuery("B = foreach A generate emp_sex, emp_name;"); + + Iterator XIter = server.openIterator("B"); + assertTrue(XIter.hasNext()); + Tuple t = XIter.next(); + assertEmpDetail(t, "M", "Krishna"); + assertTrue(XIter.hasNext()); + t = XIter.next(); + assertEmpDetail(t, "F", "Kalpana"); + assertTrue(XIter.hasNext()); + t = XIter.next(); + assertEmpDetail(t, "M", "Satya"); + assertTrue(XIter.hasNext()); + t = XIter.next(); + assertEmpDetail(t, "F", "Kavya"); + assertFalse(XIter.hasNext()); + } + } + + + public void testLoadMultiPartTable() throws Exception { + { + populateDataFile(); + PigServer server = new PigServer(ExecType.LOCAL, props); + UDFContext.getUDFContext().setClientSystemProps(); + server.setBatchOn(); + server + .registerQuery("A = load '" + + fqdataLocation + + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray, emp_country:chararray, emp_state:chararray);" + ); + server.registerQuery("INTN = FILTER A BY emp_country == 'IN' AND emp_state == 'TN';"); + server.registerQuery("INKA = FILTER A BY emp_country == 'IN' AND emp_state == 'KA';"); + server.registerQuery("USTN = FILTER A BY emp_country == 'US' AND emp_state == 'TN';"); + server.registerQuery("USKA = FILTER A BY emp_country == 'US' AND emp_state == 'KA';"); + server.registerQuery("store INTN into '" + PARTITIONED_TABLE + + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + + "', 'emp_country=in,emp_state=tn');"); + server.registerQuery("store INKA into '" + PARTITIONED_TABLE + + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + + "', 'emp_country=in,emp_state=ka');"); + server.registerQuery("store USTN into '" + PARTITIONED_TABLE + + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + + "', 'emp_country=us,emp_state=tn');"); + server.registerQuery("store USKA into '" + PARTITIONED_TABLE + + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + + "', 'emp_country=us,emp_state=ka');"); + server.executeBatch(); + } + { + PigServer server = new PigServer(ExecType.LOCAL, props); + UDFContext.getUDFContext().setClientSystemProps(); + + server + .registerQuery("A = load '" + + fqexportLocation + + "' using org.apache.hcatalog.pig.HCatEximLoader() " + //+ "as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray, emp_country:chararray, emp_state:chararray);"); + + ";"); + + Iterator XIter = server.openIterator("A"); + + Map empDetails = new TreeMap(); + addEmpDetail(empDetails, 237, "Krishna", "01/01/1990", "M", "in", "tn"); + addEmpDetail(empDetails, 238, "Kalpana", "01/01/2000", "F", "in", "ka"); + addEmpDetail(empDetails, 239, "Satya", "01/01/2001", "M", "us", "tn"); + addEmpDetail(empDetails, 240, "Kavya", "01/01/2002", "F", "us", "ka"); + + while(XIter.hasNext()) { + Tuple t = XIter.next(); + assertNotSame(0, empDetails.size()); + assertEmpDetail(t, empDetails); + } + assertEquals(0, empDetails.size()); + } + } + + public void testLoadMultiPartFilter() throws Exception { + { + populateDataFile(); + PigServer server = new PigServer(ExecType.LOCAL, props); + UDFContext.getUDFContext().setClientSystemProps(); + server.setBatchOn(); + server + .registerQuery("A = load '" + + fqdataLocation + + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray, emp_country:chararray, emp_state:chararray);" + ); + server.registerQuery("INTN = FILTER A BY emp_country == 'IN' AND emp_state == 'TN';"); + server.registerQuery("INKA = FILTER A BY emp_country == 'IN' AND emp_state == 'KA';"); + server.registerQuery("USTN = FILTER A BY emp_country == 'US' AND emp_state == 'TN';"); + server.registerQuery("USKA = FILTER A BY emp_country == 'US' AND emp_state == 'KA';"); + server.registerQuery("store INTN into '" + PARTITIONED_TABLE + + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + + "', 'emp_country=in,emp_state=tn');"); + server.registerQuery("store INKA into '" + PARTITIONED_TABLE + + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + + "', 'emp_country=in,emp_state=ka');"); + server.registerQuery("store USTN into '" + PARTITIONED_TABLE + + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + + "', 'emp_country=us,emp_state=tn');"); + server.registerQuery("store USKA into '" + PARTITIONED_TABLE + + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + + "', 'emp_country=us,emp_state=ka');"); + server.executeBatch(); + } + { + PigServer server = new PigServer(ExecType.LOCAL, props); + UDFContext.getUDFContext().setClientSystemProps(); + + server + .registerQuery("A = load '" + + fqexportLocation + + "' using org.apache.hcatalog.pig.HCatEximLoader() " + + ";"); + server.registerQuery("B = filter A by emp_state == 'ka';"); + + Iterator XIter = server.openIterator("B"); + + Map empDetails = new TreeMap(); + addEmpDetail(empDetails, 238, "Kalpana", "01/01/2000", "F", "in", "ka"); + addEmpDetail(empDetails, 240, "Kavya", "01/01/2002", "F", "us", "ka"); + + while(XIter.hasNext()) { + Tuple t = XIter.next(); + assertNotSame(0, empDetails.size()); + assertEmpDetail(t, empDetails); + } + assertEquals(0, empDetails.size()); + } + } + + +} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatEximStorer.java.broken hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatEximStorer.java.broken new file mode 100644 index 0000000..5424269 --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatEximStorer.java.broken @@ -0,0 +1,395 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hcatalog.pig; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.TreeSet; + +import junit.framework.TestCase; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hcatalog.MiniCluster; +import org.apache.hcatalog.common.HCatConstants; +import org.apache.hcatalog.common.HCatUtil; +import org.apache.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hcatalog.data.schema.HCatSchemaUtils; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.parse.EximUtil; +import org.apache.hadoop.hive.serde.Constants; +import org.apache.pig.ExecType; +import org.apache.pig.PigServer; +import org.apache.pig.impl.logicalLayer.FrontendException; +import org.apache.pig.impl.util.UDFContext; + +public class TestHCatEximStorer extends TestCase { + + private static final String NONPART_TABLE = "junit_unparted"; + private static final String PARTITIONED_TABLE = "junit_parted"; + private static MiniCluster cluster = MiniCluster.buildCluster(); + + private static final String dataLocation = "/tmp/data"; + private static String fqdataLocation; + private static final String exportLocation = "/tmp/export"; + private static String fqexportLocation; + + private static Properties props; + + private void cleanup() throws IOException { + MiniCluster.deleteFile(cluster, dataLocation); + MiniCluster.deleteFile(cluster, exportLocation); + } + + @Override + protected void setUp() throws Exception { + props = new Properties(); + props.setProperty("fs.default.name", cluster.getProperties().getProperty("fs.default.name")); + System.out.println("Filesystem class : " + cluster.getFileSystem().getClass().getName() + ", fs.default.name : " + props.getProperty("fs.default.name")); + fqdataLocation = cluster.getProperties().getProperty("fs.default.name") + dataLocation; + fqexportLocation = cluster.getProperties().getProperty("fs.default.name") + exportLocation; + System.out.println("FQ Data Location :" + fqdataLocation); + System.out.println("FQ Export Location :" + fqexportLocation); + cleanup(); + } + + @Override + protected void tearDown() throws Exception { + cleanup(); + } + + private void populateDataFile() throws IOException { + MiniCluster.deleteFile(cluster, dataLocation); + String[] input = new String[] { + "237,Krishna,01/01/1990,M,IN,TN", + "238,Kalpana,01/01/2000,F,IN,KA", + "239,Satya,01/01/2001,M,US,TN", + "240,Kavya,01/01/2002,F,US,KA" + }; + MiniCluster.createInputFile(cluster, dataLocation, input); + } + + public void testStoreNonPartTable() throws Exception { + populateDataFile(); + PigServer server = new PigServer(ExecType.LOCAL, props); + UDFContext.getUDFContext().setClientSystemProps(); + server.setBatchOn(); + server.registerQuery("A = load '" + fqdataLocation + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray);"); + server.registerQuery("store A into '" + NONPART_TABLE + + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "');"); + server.executeBatch(); + + FileSystem fs = cluster.getFileSystem(); + + System.out.println("Filesystem class : " + cluster.getFileSystem().getClass().getName() + ", fs.default.name : " + props.getProperty("fs.default.name")); + + Map.Entry> metadata = EximUtil.readMetaData(fs, new Path(exportLocation, "_metadata")); + Table table = metadata.getKey(); + List partitions = metadata.getValue(); + + List columns = new ArrayList(); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_id", + Constants.INT_TYPE_NAME, ""))); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_name", + Constants.STRING_TYPE_NAME, ""))); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_dob", + Constants.STRING_TYPE_NAME, ""))); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_sex", + Constants.STRING_TYPE_NAME, ""))); + + + assertEquals("default", table.getDbName()); + assertEquals(NONPART_TABLE, table.getTableName()); + assertTrue(EximUtil.schemaCompare(table.getSd().getCols(), + HCatUtil.getFieldSchemaList(columns))); + assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", + table.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); + assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", + table.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); + assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat", + table.getSd().getInputFormat()); + assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat", + table.getSd().getOutputFormat()); + assertEquals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe", + table.getSd().getSerdeInfo().getSerializationLib()); + assertEquals(0, table.getPartitionKeys().size()); + + assertEquals(0, partitions.size()); + } + + public void testStorePartTable() throws Exception { + populateDataFile(); + PigServer server = new PigServer(ExecType.LOCAL, props); + UDFContext.getUDFContext().setClientSystemProps(); + server.setBatchOn(); + server.registerQuery("A = load '" + fqdataLocation + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray);"); + server.registerQuery("store A into '" + PARTITIONED_TABLE + + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "', 'emp_country=in,emp_state=tn');"); + server.executeBatch(); + + FileSystem fs = cluster.getFileSystem(); + + System.out.println("Filesystem class : " + cluster.getFileSystem().getClass().getName() + ", fs.default.name : " + props.getProperty("fs.default.name")); + + Map.Entry> metadata = EximUtil.readMetaData(fs, new Path(exportLocation, "_metadata")); + Table table = metadata.getKey(); + List partitions = metadata.getValue(); + + List columns = new ArrayList(); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_id", + Constants.INT_TYPE_NAME, ""))); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_name", + Constants.STRING_TYPE_NAME, ""))); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_dob", + Constants.STRING_TYPE_NAME, ""))); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_sex", + Constants.STRING_TYPE_NAME, ""))); + + + assertEquals("default", table.getDbName()); + assertEquals(PARTITIONED_TABLE, table.getTableName()); + assertTrue(EximUtil.schemaCompare(table.getSd().getCols(), + HCatUtil.getFieldSchemaList(columns))); + assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", + table.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); + assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", + table.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); + assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat", + table.getSd().getInputFormat()); + assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat", + table.getSd().getOutputFormat()); + assertEquals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe", + table.getSd().getSerdeInfo().getSerializationLib()); + assertEquals(2, table.getPartitionKeys().size()); + List partSchema = table.getPartitionKeys(); + assertEquals("emp_country", partSchema.get(0).getName()); + assertEquals("emp_state", partSchema.get(1).getName()); + + assertEquals(1, partitions.size()); + Partition partition = partitions.get(0); + assertEquals("in", partition.getValues().get(0)); + assertEquals("tn", partition.getValues().get(1)); + assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", + partition.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); + assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", + partition.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); + } + + public void testStorePartTable_state_country() throws Exception { + populateDataFile(); + PigServer server = new PigServer(ExecType.LOCAL, props); + UDFContext.getUDFContext().setClientSystemProps(); + server.setBatchOn(); + server.registerQuery("A = load '" + fqdataLocation + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray);"); + server.registerQuery("store A into '" + PARTITIONED_TABLE + + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "', 'emp_state=tn,emp_country=in');"); + server.executeBatch(); + + FileSystem fs = cluster.getFileSystem(); + + System.out.println("Filesystem class : " + cluster.getFileSystem().getClass().getName() + ", fs.default.name : " + props.getProperty("fs.default.name")); + + Map.Entry> metadata = EximUtil.readMetaData(fs, new Path(exportLocation, "_metadata")); + Table table = metadata.getKey(); + List partitions = metadata.getValue(); + + List columns = new ArrayList(); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_id", + Constants.INT_TYPE_NAME, ""))); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_name", + Constants.STRING_TYPE_NAME, ""))); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_dob", + Constants.STRING_TYPE_NAME, ""))); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_sex", + Constants.STRING_TYPE_NAME, ""))); + + + assertEquals("default", table.getDbName()); + assertEquals(PARTITIONED_TABLE, table.getTableName()); + assertTrue(EximUtil.schemaCompare(table.getSd().getCols(), + HCatUtil.getFieldSchemaList(columns))); + assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", + table.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); + assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", + table.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); + assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat", + table.getSd().getInputFormat()); + assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat", + table.getSd().getOutputFormat()); + assertEquals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe", + table.getSd().getSerdeInfo().getSerializationLib()); + assertEquals(2, table.getPartitionKeys().size()); + List partSchema = table.getPartitionKeys(); + assertEquals("emp_state", partSchema.get(0).getName()); + assertEquals("emp_country", partSchema.get(1).getName()); + + assertEquals(1, partitions.size()); + Partition partition = partitions.get(0); + assertEquals("tn", partition.getValues().get(0)); + assertEquals("in", partition.getValues().get(1)); + assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", + partition.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); + assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", + partition.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); + } + + public void testStoreNonPartCompatSchemaTable() throws Exception { + populateDataFile(); + PigServer server = new PigServer(ExecType.LOCAL, props); + UDFContext.getUDFContext().setClientSystemProps(); + server.setBatchOn(); + server.registerQuery("A = load '" + fqdataLocation + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray);"); + server.registerQuery("store A into '" + NONPART_TABLE + + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "', '', 'id:int, name:chararray, dob:chararray, sex:chararray');"); + server.executeBatch(); + + FileSystem fs = cluster.getFileSystem(); + + System.out.println("Filesystem class : " + cluster.getFileSystem().getClass().getName() + ", fs.default.name : " + props.getProperty("fs.default.name")); + + Map.Entry> metadata = EximUtil.readMetaData(fs, new Path(exportLocation, "_metadata")); + Table table = metadata.getKey(); + List partitions = metadata.getValue(); + + List columns = new ArrayList(); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("id", + Constants.INT_TYPE_NAME, ""))); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("name", + Constants.STRING_TYPE_NAME, ""))); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("dob", + Constants.STRING_TYPE_NAME, ""))); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("sex", + Constants.STRING_TYPE_NAME, ""))); + + + assertEquals("default", table.getDbName()); + assertEquals(NONPART_TABLE, table.getTableName()); + assertTrue(EximUtil.schemaCompare(table.getSd().getCols(), + HCatUtil.getFieldSchemaList(columns))); + assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", + table.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); + assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", + table.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); + assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat", + table.getSd().getInputFormat()); + assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat", + table.getSd().getOutputFormat()); + assertEquals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe", + table.getSd().getSerdeInfo().getSerializationLib()); + assertEquals(0, table.getPartitionKeys().size()); + + assertEquals(0, partitions.size()); + } + + public void testStoreNonPartNonCompatSchemaTable() throws Exception { + populateDataFile(); + PigServer server = new PigServer(ExecType.LOCAL, props); + UDFContext.getUDFContext().setClientSystemProps(); + server.setBatchOn(); + server.registerQuery("A = load '" + fqdataLocation + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray);"); + server.registerQuery("store A into '" + NONPART_TABLE + + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "', '', 'id:int, name:chararray, dob:chararray, sex:int');"); + try { + server.executeBatch(); + fail("Expected exception not thrown"); + } catch (FrontendException e) { + } + } + + public void testStoreMultiPartTable() throws Exception { + populateDataFile(); + PigServer server = new PigServer(ExecType.LOCAL, props); + UDFContext.getUDFContext().setClientSystemProps(); + server.setBatchOn(); + server.registerQuery("A = load '" + fqdataLocation + "' using PigStorage(',') as (emp_id:int, emp_name:chararray, emp_dob:chararray, emp_sex:chararray, emp_country:chararray, emp_state:chararray);"); + server.registerQuery("INTN = FILTER A BY emp_country == 'IN' AND emp_state == 'TN';"); + server.registerQuery("INKA = FILTER A BY emp_country == 'IN' AND emp_state == 'KA';"); + server.registerQuery("USTN = FILTER A BY emp_country == 'US' AND emp_state == 'TN';"); + server.registerQuery("USKA = FILTER A BY emp_country == 'US' AND emp_state == 'KA';"); + server.registerQuery("store INTN into '" + PARTITIONED_TABLE + + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "', 'emp_country=in,emp_state=tn');"); + server.registerQuery("store INKA into '" + PARTITIONED_TABLE + + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "', 'emp_country=in,emp_state=ka');"); + server.registerQuery("store USTN into '" + PARTITIONED_TABLE + + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "', 'emp_country=us,emp_state=tn');"); + server.registerQuery("store USKA into '" + PARTITIONED_TABLE + + "' using org.apache.hcatalog.pig.HCatEximStorer('" + fqexportLocation + "', 'emp_country=us,emp_state=ka');"); + server.executeBatch(); + + FileSystem fs = cluster.getFileSystem(); + + System.out.println("Filesystem class : " + cluster.getFileSystem().getClass().getName() + ", fs.default.name : " + props.getProperty("fs.default.name")); + + Map.Entry> metadata = EximUtil.readMetaData(fs, new Path(exportLocation, "_metadata")); + Table table = metadata.getKey(); + List partitions = metadata.getValue(); + + List columns = new ArrayList(); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_id", + Constants.INT_TYPE_NAME, ""))); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_name", + Constants.STRING_TYPE_NAME, ""))); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_dob", + Constants.STRING_TYPE_NAME, ""))); + columns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("emp_sex", + Constants.STRING_TYPE_NAME, ""))); + + + assertEquals("default", table.getDbName()); + assertEquals(PARTITIONED_TABLE, table.getTableName()); + assertTrue(EximUtil.schemaCompare(table.getSd().getCols(), + HCatUtil.getFieldSchemaList(columns))); + assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", + table.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); + assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", + table.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); + assertEquals("org.apache.hadoop.hive.ql.io.RCFileInputFormat", + table.getSd().getInputFormat()); + assertEquals("org.apache.hadoop.hive.ql.io.RCFileOutputFormat", + table.getSd().getOutputFormat()); + assertEquals("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe", + table.getSd().getSerdeInfo().getSerializationLib()); + assertEquals(2, table.getPartitionKeys().size()); + List partSchema = table.getPartitionKeys(); + assertEquals("emp_country", partSchema.get(0).getName()); + assertEquals("emp_state", partSchema.get(1).getName()); + + assertEquals(4, partitions.size()); + Set parts = new TreeSet(); + parts.add("in,tn"); + parts.add("in,ka"); + parts.add("us,tn"); + parts.add("us,ka"); + + for (Partition partition : partitions) { + assertEquals("org.apache.hcatalog.rcfile.RCFileInputDriver", + partition.getParameters().get(HCatConstants.HCAT_ISD_CLASS)); + assertEquals("org.apache.hcatalog.rcfile.RCFileOutputDriver", + partition.getParameters().get(HCatConstants.HCAT_OSD_CLASS)); + assertTrue(parts.remove(partition.getValues().get(0) + "," + partition.getValues().get(1))); + } + assertEquals(0, parts.size()); + } +} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java new file mode 100644 index 0000000..4faba3e --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoader.java @@ -0,0 +1,451 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.pig; + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import junit.framework.TestCase; + +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.CommandNeedRetryException; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hive.hcatalog.HcatTestUtils; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.data.Pair; +import org.apache.pig.ExecType; +import org.apache.pig.PigServer; +import org.apache.pig.ResourceStatistics; +import org.apache.pig.data.DataType; +import org.apache.pig.data.Tuple; +import org.apache.pig.impl.logicalLayer.schema.Schema; +import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; + +public class TestHCatLoader extends TestCase { + private static final String TEST_DATA_DIR = + "/tmp/build/test/data/" + TestHCatLoader.class.getCanonicalName(); + private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; + private static final String BASIC_FILE_NAME = TEST_DATA_DIR + "/basic.input.data"; + private static final String COMPLEX_FILE_NAME = TEST_DATA_DIR + "/complex.input.data"; + + private static final String BASIC_TABLE = "junit_unparted_basic"; + private static final String COMPLEX_TABLE = "junit_unparted_complex"; + private static final String PARTITIONED_TABLE = "junit_parted_basic"; + private static final String SPECIFIC_SIZE_TABLE = "junit_specific_size"; + private static Driver driver; + + private static int guardTestCount = 6; // ugh, instantiate using introspection in guardedSetupBeforeClass + private static boolean setupHasRun = false; + + + private static Map> basicInputData; + + protected String storageFormat() { + return "RCFILE tblproperties('hcat.isd'='org.apache.hive.hcatalog.rcfile.RCFileInputDriver'," + + "'hcat.osd'='org.apache.hive.hcatalog.rcfile.RCFileOutputDriver')"; + } + + private void dropTable(String tablename) throws IOException, CommandNeedRetryException { + driver.run("drop table " + tablename); + } + + private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { + String createTable; + createTable = "create table " + tablename + "(" + schema + ") "; + if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { + createTable = createTable + "partitioned by (" + partitionedBy + ") "; + } + createTable = createTable + "stored as " +storageFormat(); + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]"); + } + } + + private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { + createTable(tablename, schema, null); + } + + protected void guardedSetUpBeforeClass() throws Exception { + if (!setupHasRun) { + setupHasRun = true; + } else { + return; + } + + File f = new File(TEST_WAREHOUSE_DIR); + if (f.exists()) { + FileUtil.fullyDelete(f); + } + new File(TEST_WAREHOUSE_DIR).mkdirs(); + + HiveConf hiveConf = new HiveConf(this.getClass()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); + driver = new Driver(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); + + cleanup(); + + createTable(BASIC_TABLE, "a int, b string"); + createTable(COMPLEX_TABLE, + "name string, studentid int, " + + "contact struct, " + + "currently_registered_courses array, " + + "current_grades map, " + + "phnos array>"); + + createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); + createTable(SPECIFIC_SIZE_TABLE, "a int, b string"); + + int LOOP_SIZE = 3; + String[] input = new String[LOOP_SIZE * LOOP_SIZE]; + basicInputData = new HashMap>(); + int k = 0; + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + String sj = "S" + j + "S"; + input[k] = si + "\t" + sj; + basicInputData.put(k, new Pair(i, sj)); + k++; + } + } + HcatTestUtils.createTestDataFile(BASIC_FILE_NAME, input); + HcatTestUtils.createTestDataFile(COMPLEX_FILE_NAME, + new String[]{ + //"Henry Jekyll\t42\t(415-253-6367,hjekyll@contemporary.edu.uk)\t{(PHARMACOLOGY),(PSYCHIATRY)},[PHARMACOLOGY#A-,PSYCHIATRY#B+],{(415-253-6367,cell),(408-253-6367,landline)}", + //"Edward Hyde\t1337\t(415-253-6367,anonymous@b44chan.org)\t{(CREATIVE_WRITING),(COPYRIGHT_LAW)},[CREATIVE_WRITING#A+,COPYRIGHT_LAW#D],{(415-253-6367,cell),(408-253-6367,landline)}", + } + ); + + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + BASIC_FILE_NAME + "' as (a:int, b:chararray);"); + + server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();"); + server.registerQuery("store A into '" + SPECIFIC_SIZE_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();"); + server.registerQuery("B = foreach A generate a,b;"); + server.registerQuery("B2 = filter B by a < 2;"); + server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=0');"); + + server.registerQuery("C = foreach A generate a,b;"); + server.registerQuery("C2 = filter C by a >= 2;"); + server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=1');"); + + server.registerQuery("D = load '" + COMPLEX_FILE_NAME + "' as (name:chararray, studentid:int, contact:tuple(phno:chararray,email:chararray), currently_registered_courses:bag{innertup:tuple(course:chararray)}, current_grades:map[ ] , phnos :bag{innertup:tuple(phno:chararray,type:chararray)});"); + server.registerQuery("store D into '" + COMPLEX_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();"); + server.executeBatch(); + + } + + private void cleanup() throws IOException, CommandNeedRetryException { + dropTable(BASIC_TABLE); + dropTable(COMPLEX_TABLE); + dropTable(PARTITIONED_TABLE); + dropTable(SPECIFIC_SIZE_TABLE); + } + + protected void guardedTearDownAfterClass() throws Exception { + guardTestCount--; + if (guardTestCount > 0) { + return; + } + cleanup(); + } + + @Override + protected void setUp() throws Exception { + guardedSetUpBeforeClass(); + } + + @Override + protected void tearDown() throws Exception { + guardedTearDownAfterClass(); + } + + public void testSchemaLoadBasic() throws IOException { + + PigServer server = new PigServer(ExecType.LOCAL); + + // test that schema was loaded correctly + server.registerQuery("X = load '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + Schema dumpedXSchema = server.dumpSchema("X"); + List Xfields = dumpedXSchema.getFields(); + assertEquals(2, Xfields.size()); + assertTrue(Xfields.get(0).alias.equalsIgnoreCase("a")); + assertTrue(Xfields.get(0).type == DataType.INTEGER); + assertTrue(Xfields.get(1).alias.equalsIgnoreCase("b")); + assertTrue(Xfields.get(1).type == DataType.CHARARRAY); + + } + + public void testReadDataBasic() throws IOException { + PigServer server = new PigServer(ExecType.LOCAL); + + server.registerQuery("X = load '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + Iterator XIter = server.openIterator("X"); + int numTuplesRead = 0; + while (XIter.hasNext()) { + Tuple t = XIter.next(); + assertEquals(2, t.size()); + assertTrue(t.get(0).getClass() == Integer.class); + assertTrue(t.get(1).getClass() == String.class); + assertEquals(t.get(0), basicInputData.get(numTuplesRead).first); + assertEquals(t.get(1), basicInputData.get(numTuplesRead).second); + numTuplesRead++; + } + assertEquals(basicInputData.size(), numTuplesRead); + } + + public void testSchemaLoadComplex() throws IOException { + + PigServer server = new PigServer(ExecType.LOCAL); + + // test that schema was loaded correctly + server.registerQuery("K = load '" + COMPLEX_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + Schema dumpedKSchema = server.dumpSchema("K"); + List Kfields = dumpedKSchema.getFields(); + assertEquals(6, Kfields.size()); + + assertEquals(DataType.CHARARRAY, Kfields.get(0).type); + assertEquals("name", Kfields.get(0).alias.toLowerCase()); + + assertEquals(DataType.INTEGER, Kfields.get(1).type); + assertEquals("studentid", Kfields.get(1).alias.toLowerCase()); + + assertEquals(DataType.TUPLE, Kfields.get(2).type); + assertEquals("contact", Kfields.get(2).alias.toLowerCase()); + { + assertNotNull(Kfields.get(2).schema); + assertTrue(Kfields.get(2).schema.getFields().size() == 2); + assertTrue(Kfields.get(2).schema.getFields().get(0).type == DataType.CHARARRAY); + assertTrue(Kfields.get(2).schema.getFields().get(0).alias.equalsIgnoreCase("phno")); + assertTrue(Kfields.get(2).schema.getFields().get(1).type == DataType.CHARARRAY); + assertTrue(Kfields.get(2).schema.getFields().get(1).alias.equalsIgnoreCase("email")); + } + assertEquals(DataType.BAG, Kfields.get(3).type); + assertEquals("currently_registered_courses", Kfields.get(3).alias.toLowerCase()); + { + assertNotNull(Kfields.get(3).schema); + assertEquals(1, Kfields.get(3).schema.getFields().size()); + assertEquals(DataType.TUPLE, Kfields.get(3).schema.getFields().get(0).type); + assertNotNull(Kfields.get(3).schema.getFields().get(0).schema); + assertEquals(1, Kfields.get(3).schema.getFields().get(0).schema.getFields().size()); + assertEquals(DataType.CHARARRAY, Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).type); + // assertEquals("course",Kfields.get(3).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase()); + // commented out, because the name becomes "innerfield" by default - we call it "course" in pig, + // but in the metadata, it'd be anonymous, so this would be autogenerated, which is fine + } + assertEquals(DataType.MAP, Kfields.get(4).type); + assertEquals("current_grades", Kfields.get(4).alias.toLowerCase()); + assertEquals(DataType.BAG, Kfields.get(5).type); + assertEquals("phnos", Kfields.get(5).alias.toLowerCase()); + { + assertNotNull(Kfields.get(5).schema); + assertEquals(1, Kfields.get(5).schema.getFields().size()); + assertEquals(DataType.TUPLE, Kfields.get(5).schema.getFields().get(0).type); + assertNotNull(Kfields.get(5).schema.getFields().get(0).schema); + assertTrue(Kfields.get(5).schema.getFields().get(0).schema.getFields().size() == 2); + assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).type); + assertEquals("phno", Kfields.get(5).schema.getFields().get(0).schema.getFields().get(0).alias.toLowerCase()); + assertEquals(DataType.CHARARRAY, Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).type); + assertEquals("type", Kfields.get(5).schema.getFields().get(0).schema.getFields().get(1).alias.toLowerCase()); + } + + } + + public void testReadPartitionedBasic() throws IOException, CommandNeedRetryException { + PigServer server = new PigServer(ExecType.LOCAL); + + driver.run("select * from " + PARTITIONED_TABLE); + ArrayList valuesReadFromHiveDriver = new ArrayList(); + driver.getResults(valuesReadFromHiveDriver); + assertEquals(basicInputData.size(), valuesReadFromHiveDriver.size()); + + server.registerQuery("W = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + Schema dumpedWSchema = server.dumpSchema("W"); + List Wfields = dumpedWSchema.getFields(); + assertEquals(3, Wfields.size()); + assertTrue(Wfields.get(0).alias.equalsIgnoreCase("a")); + assertTrue(Wfields.get(0).type == DataType.INTEGER); + assertTrue(Wfields.get(1).alias.equalsIgnoreCase("b")); + assertTrue(Wfields.get(1).type == DataType.CHARARRAY); + assertTrue(Wfields.get(2).alias.equalsIgnoreCase("bkt")); + assertTrue(Wfields.get(2).type == DataType.CHARARRAY); + + Iterator WIter = server.openIterator("W"); + Collection> valuesRead = new ArrayList>(); + while (WIter.hasNext()) { + Tuple t = WIter.next(); + assertTrue(t.size() == 3); + assertTrue(t.get(0).getClass() == Integer.class); + assertTrue(t.get(1).getClass() == String.class); + assertTrue(t.get(2).getClass() == String.class); + valuesRead.add(new Pair((Integer) t.get(0), (String) t.get(1))); + if ((Integer) t.get(0) < 2) { + assertEquals("0", t.get(2)); + } else { + assertEquals("1", t.get(2)); + } + } + assertEquals(valuesReadFromHiveDriver.size(), valuesRead.size()); + + server.registerQuery("P1 = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + server.registerQuery("P1filter = filter P1 by bkt == '0';"); + Iterator P1Iter = server.openIterator("P1filter"); + int count1 = 0; + while (P1Iter.hasNext()) { + Tuple t = P1Iter.next(); + + assertEquals("0", t.get(2)); + assertEquals(1, t.get(0)); + count1++; + } + assertEquals(3, count1); + + server.registerQuery("P2 = load '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + server.registerQuery("P2filter = filter P2 by bkt == '1';"); + Iterator P2Iter = server.openIterator("P2filter"); + int count2 = 0; + while (P2Iter.hasNext()) { + Tuple t = P2Iter.next(); + + assertEquals("1", t.get(2)); + assertTrue(((Integer) t.get(0)) > 1); + count2++; + } + assertEquals(6, count2); + } + + public void testProjectionsBasic() throws IOException { + + PigServer server = new PigServer(ExecType.LOCAL); + + // projections are handled by using generate, not "as" on the Load + + server.registerQuery("Y1 = load '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + server.registerQuery("Y2 = foreach Y1 generate a;"); + server.registerQuery("Y3 = foreach Y1 generate b,a;"); + Schema dumpedY2Schema = server.dumpSchema("Y2"); + Schema dumpedY3Schema = server.dumpSchema("Y3"); + List Y2fields = dumpedY2Schema.getFields(); + List Y3fields = dumpedY3Schema.getFields(); + assertEquals(1, Y2fields.size()); + assertEquals("a", Y2fields.get(0).alias.toLowerCase()); + assertEquals(DataType.INTEGER, Y2fields.get(0).type); + assertEquals(2, Y3fields.size()); + assertEquals("b", Y3fields.get(0).alias.toLowerCase()); + assertEquals(DataType.CHARARRAY, Y3fields.get(0).type); + assertEquals("a", Y3fields.get(1).alias.toLowerCase()); + assertEquals(DataType.INTEGER, Y3fields.get(1).type); + + int numTuplesRead = 0; + Iterator Y2Iter = server.openIterator("Y2"); + while (Y2Iter.hasNext()) { + Tuple t = Y2Iter.next(); + assertEquals(t.size(), 1); + assertTrue(t.get(0).getClass() == Integer.class); + assertEquals(t.get(0), basicInputData.get(numTuplesRead).first); + numTuplesRead++; + } + numTuplesRead = 0; + Iterator Y3Iter = server.openIterator("Y3"); + while (Y3Iter.hasNext()) { + Tuple t = Y3Iter.next(); + assertEquals(t.size(), 2); + assertTrue(t.get(0).getClass() == String.class); + assertEquals(t.get(0), basicInputData.get(numTuplesRead).second); + assertTrue(t.get(1).getClass() == Integer.class); + assertEquals(t.get(1), basicInputData.get(numTuplesRead).first); + numTuplesRead++; + } + assertEquals(basicInputData.size(), numTuplesRead); + } + + public void testGetInputBytes() throws Exception { + File file = new File(TEST_WAREHOUSE_DIR + "/" + SPECIFIC_SIZE_TABLE + "/part-m-00000"); + file.deleteOnExit(); + RandomAccessFile randomAccessFile = new RandomAccessFile(file, "rw"); + randomAccessFile.setLength(2L * 1024 * 1024 * 1024); + + Job job = new Job(); + HCatLoader hCatLoader = new HCatLoader(); + hCatLoader.setUDFContextSignature(this.getName()); + hCatLoader.setLocation(SPECIFIC_SIZE_TABLE, job); + ResourceStatistics statistics = hCatLoader.getStatistics(file.getAbsolutePath(), job); + assertEquals(2048, (long) statistics.getmBytes()); + } + + public void testConvertBooleanToInt() throws Exception { + String tbl = "test_convert_boolean_to_int"; + String inputFileName = TEST_DATA_DIR + "/testConvertBooleanToInt/data.txt"; + File inputDataDir = new File(inputFileName).getParentFile(); + inputDataDir.mkdir(); + + String[] lines = new String[]{"llama\ttrue", "alpaca\tfalse"}; + HcatTestUtils.createTestDataFile(inputFileName, lines); + + assertEquals(0, driver.run("drop table if exists " + tbl).getResponseCode()); + assertEquals(0, driver.run("create external table " + tbl + + " (a string, b boolean) row format delimited fields terminated by '\t'" + + " stored as textfile location 'file://" + + inputDataDir.getPath().replaceAll("\\\\", "/") + "'").getResponseCode()); + + Properties properties = new Properties(); + properties.setProperty(HCatConstants.HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER, "true"); + PigServer server = new PigServer(ExecType.LOCAL, properties); + server.registerQuery( + "data = load 'test_convert_boolean_to_int' using org.apache.hive.hcatalog.pig.HCatLoader();"); + Schema schema = server.dumpSchema("data"); + assertEquals(2, schema.getFields().size()); + + assertEquals("a", schema.getField(0).alias); + assertEquals(DataType.CHARARRAY, schema.getField(0).type); + assertEquals("b", schema.getField(1).alias); + if (PigHCatUtil.pigHasBooleanSupport()){ + assertEquals(DataType.BOOLEAN, schema.getField(1).type); + } else { + assertEquals(DataType.INTEGER, schema.getField(1).type); + } + + Iterator iterator = server.openIterator("data"); + Tuple t = iterator.next(); + assertEquals("llama", t.get(0)); + assertEquals(1, t.get(1)); + t = iterator.next(); + assertEquals("alpaca", t.get(0)); + assertEquals(0, t.get(1)); + assertFalse(iterator.hasNext()); + } +} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java new file mode 100644 index 0000000..0f7182f --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java @@ -0,0 +1,304 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.pig; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; + +import junit.framework.Assert; + +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.CommandNeedRetryException; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.pig.ExecType; +import org.apache.pig.PigServer; +import org.apache.pig.backend.executionengine.ExecException; +import org.apache.pig.backend.executionengine.ExecJob; +import org.apache.pig.data.BagFactory; +import org.apache.pig.data.DataBag; +import org.apache.pig.data.Tuple; +import org.apache.pig.data.TupleFactory; +import org.apache.pig.impl.logicalLayer.FrontendException; +import org.apache.pig.impl.logicalLayer.schema.Schema; +import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestHCatLoaderComplexSchema { + + //private static MiniCluster cluster = MiniCluster.buildCluster(); + private static Driver driver; + //private static Properties props; + private static final Logger LOG = LoggerFactory.getLogger(TestHCatLoaderComplexSchema.class); + + private void dropTable(String tablename) throws IOException, CommandNeedRetryException { + driver.run("drop table " + tablename); + } + + protected String storageFormat() { + return "RCFILE tblproperties('hcat.isd'='org.apache.hive.hcatalog.rcfile.RCFileInputDriver'," + + "'hcat.osd'='org.apache.hive.hcatalog.rcfile.RCFileOutputDriver')"; + } + + private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { + String createTable; + createTable = "create table " + tablename + "(" + schema + ") "; + if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { + createTable = createTable + "partitioned by (" + partitionedBy + ") "; + } + createTable = createTable + "stored as " + storageFormat(); + LOG.info("Creating table:\n {}", createTable); + CommandProcessorResponse result = driver.run(createTable); + int retCode = result.getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + " " + result.getErrorMessage() + "]"); + } + } + + private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { + createTable(tablename, schema, null); + } + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + + HiveConf hiveConf = new HiveConf(TestHCatLoaderComplexSchema.class); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + driver = new Driver(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); + //props = new Properties(); + //props.setProperty("fs.default.name", cluster.getProperties().getProperty("fs.default.name")); + + } + + private static final TupleFactory tf = TupleFactory.getInstance(); + private static final BagFactory bf = BagFactory.getInstance(); + + private Tuple t(Object... objects) { + return tf.newTuple(Arrays.asList(objects)); + } + + private DataBag b(Tuple... objects) { + return bf.newDefaultBag(Arrays.asList(objects)); + } + + /** + * artificially complex nested schema to test nested schema conversion + * @throws Exception + */ + @Test + public void testSyntheticComplexSchema() throws Exception { + String pigSchema = + "a: " + + "(" + + "aa: chararray, " + + "ab: long, " + + "ac: map[], " + + "ad: { t: (ada: long) }, " + + "ae: { t: (aea:long, aeb: ( aeba: chararray, aebb: long)) }," + + "af: (afa: chararray, afb: long) " + + ")," + + "b: chararray, " + + "c: long, " + + "d: { t: (da:long, db: ( dba: chararray, dbb: long), dc: { t: (dca: long) } ) } "; + + // with extra structs + String tableSchema = + "a struct<" + + "aa: string, " + + "ab: bigint, " + + "ac: map, " + + "ad: array>, " + + "ae: array>>," + + "af: struct " + + ">, " + + "b string, " + + "c bigint, " + + "d array, dc: array>>>"; + + // without extra structs + String tableSchema2 = + "a struct<" + + "aa: string, " + + "ab: bigint, " + + "ac: map, " + + "ad: array, " + + "ae: array>>," + + "af: struct " + + ">, " + + "b string, " + + "c bigint, " + + "d array, dc: array>>"; + + List data = new ArrayList(); + for (int i = 0; i < 10; i++) { + Tuple t = t( + t( + "aa test", + 2l, + new HashMap() { + { + put("ac test1", "test 1"); + put("ac test2", "test 2"); + } + }, + b(t(3l), t(4l)), + b(t(5l, t("aeba test", 6l))), + t("afa test", 7l) + ), + "b test", + (long) i, + b(t(8l, t("dba test", 9l), b(t(10l))))); + + data.add(t); + } + verifyWriteRead("testSyntheticComplexSchema", pigSchema, tableSchema, data, true); + verifyWriteRead("testSyntheticComplexSchema", pigSchema, tableSchema, data, false); + verifyWriteRead("testSyntheticComplexSchema2", pigSchema, tableSchema2, data, true); + verifyWriteRead("testSyntheticComplexSchema2", pigSchema, tableSchema2, data, false); + + } + + private void verifyWriteRead(String tablename, String pigSchema, String tableSchema, List data, boolean provideSchemaToStorer) + throws IOException, CommandNeedRetryException, ExecException, FrontendException { + MockLoader.setData(tablename + "Input", data); + try { + createTable(tablename, tableSchema); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + tablename + "Input' using org.apache.hive.hcatalog.pig.MockLoader() AS (" + pigSchema + ");"); + Schema dumpedASchema = server.dumpSchema("A"); + server.registerQuery("STORE A into '" + tablename + "' using org.apache.hive.hcatalog.pig.HCatStorer(" + + (provideSchemaToStorer ? "'', '" + pigSchema + "'" : "") + + ");"); + + ExecJob execJob = server.executeBatch().get(0); + if (!execJob.getStatistics().isSuccessful()) { + throw new RuntimeException("Import failed", execJob.getException()); + } + // test that schema was loaded correctly + server.registerQuery("X = load '" + tablename + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + server.dumpSchema("X"); + Iterator it = server.openIterator("X"); + int i = 0; + while (it.hasNext()) { + Tuple input = data.get(i++); + Tuple output = it.next(); + Assert.assertEquals(input.toString(), output.toString()); + LOG.info("tuple : {} ", output); + } + Schema dumpedXSchema = server.dumpSchema("X"); + + Assert.assertEquals( + "expected " + dumpedASchema + " but was " + dumpedXSchema + " (ignoring field names)", + "", + compareIgnoreFiledNames(dumpedASchema, dumpedXSchema)); + + } finally { + dropTable(tablename); + } + } + + private String compareIgnoreFiledNames(Schema expected, Schema got) throws FrontendException { + if (expected == null || got == null) { + if (expected == got) { + return ""; + } else { + return "\nexpected " + expected + " got " + got; + } + } + if (expected.size() != got.size()) { + return "\nsize expected " + expected.size() + " (" + expected + ") got " + got.size() + " (" + got + ")"; + } + String message = ""; + for (int i = 0; i < expected.size(); i++) { + FieldSchema expectedField = expected.getField(i); + FieldSchema gotField = got.getField(i); + if (expectedField.type != gotField.type) { + message += "\ntype expected " + expectedField.type + " (" + expectedField + ") got " + gotField.type + " (" + gotField + ")"; + } else { + message += compareIgnoreFiledNames(expectedField.schema, gotField.schema); + } + } + return message; + } + + /** + * tests that unnecessary tuples are drop while converting schema + * (Pig requires Tuples in Bags) + * @throws Exception + */ + @Test + public void testTupleInBagInTupleInBag() throws Exception { + String pigSchema = "a: { b : ( c: { d: (i : long) } ) }"; + + String tableSchema = "a array< array< bigint > >"; + + List data = new ArrayList(); + data.add(t(b(t(b(t(100l), t(101l))), t(b(t(110l)))))); + data.add(t(b(t(b(t(200l))), t(b(t(210l))), t(b(t(220l)))))); + data.add(t(b(t(b(t(300l), t(301l)))))); + data.add(t(b(t(b(t(400l))), t(b(t(410l), t(411l), t(412l)))))); + + + verifyWriteRead("TupleInBagInTupleInBag1", pigSchema, tableSchema, data, true); + verifyWriteRead("TupleInBagInTupleInBag2", pigSchema, tableSchema, data, false); + + // test that we don't drop the unnecessary tuple if the table has the corresponding Struct + String tableSchema2 = "a array< struct< c: array< struct< i: bigint > > > >"; + + verifyWriteRead("TupleInBagInTupleInBag3", pigSchema, tableSchema2, data, true); + verifyWriteRead("TupleInBagInTupleInBag4", pigSchema, tableSchema2, data, false); + + } + + @Test + public void testMapWithComplexData() throws Exception { + String pigSchema = "a: long, b: map[]"; + String tableSchema = "a bigint, b map>"; + + List data = new ArrayList(); + for (int i = 0; i < 10; i++) { + Tuple t = t( + (long) i, + new HashMap() { + { + put("b test 1", t(1l, "test 1")); + put("b test 2", t(2l, "test 2")); + } + }); + + data.add(t); + } + verifyWriteRead("testMapWithComplexData", pigSchema, tableSchema, data, true); + verifyWriteRead("testMapWithComplexData2", pigSchema, tableSchema, data, false); + + } +} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderStorer.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderStorer.java new file mode 100644 index 0000000..43be7ab --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderStorer.java @@ -0,0 +1,141 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.pig; + +import org.apache.hadoop.fs.FileUtil; +import org.apache.hive.hcatalog.HcatTestUtils; +import org.apache.hive.hcatalog.mapreduce.HCatBaseTest; +import org.apache.pig.ExecType; +import org.apache.pig.PigServer; +import org.apache.pig.backend.executionengine.ExecJob; +import org.apache.pig.data.DataType; +import org.apache.pig.data.Tuple; +import org.apache.pig.impl.logicalLayer.schema.Schema; +import org.junit.Assert; +import org.junit.Test; + +import java.io.File; +import java.util.Iterator; +import java.util.List; + +/** + * Test that require both HCatLoader and HCatStorer. For read or write only functionality, + * please consider @{link TestHCatLoader} or @{link TestHCatStorer}. + */ +public class TestHCatLoaderStorer extends HCatBaseTest { + + /** + * Ensure Pig can read/write tinyint/smallint columns. + */ + @Test + public void testSmallTinyInt() throws Exception { + + String readTblName = "test_small_tiny_int"; + File dataDir = new File(TEST_DATA_DIR + "/testSmallTinyIntData"); + File dataFile = new File(dataDir, "testSmallTinyInt.tsv"); + + String writeTblName = "test_small_tiny_int_write"; + File writeDataFile = new File(TEST_DATA_DIR, writeTblName + ".tsv"); + + FileUtil.fullyDelete(dataDir); // Might not exist + Assert.assertTrue(dataDir.mkdir()); + + HcatTestUtils.createTestDataFile(dataFile.getAbsolutePath(), new String[]{ + String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE), + String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE) + }); + + // Create a table with smallint/tinyint columns, load data, and query from Hive. + Assert.assertEquals(0, driver.run("drop table if exists " + readTblName).getResponseCode()); + Assert.assertEquals(0, driver.run("create external table " + readTblName + + " (my_small_int smallint, my_tiny_int tinyint)" + + " row format delimited fields terminated by '\t' stored as textfile").getResponseCode()); + Assert.assertEquals(0, driver.run("load data local inpath '" + + dataDir.getPath().replaceAll("\\\\", "/") + "' into table " + readTblName).getResponseCode()); + + PigServer server = new PigServer(ExecType.LOCAL); + server.registerQuery( + "data = load '" + readTblName + "' using org.apache.hive.hcatalog.pig.HCatLoader();"); + + // Ensure Pig schema is correct. + Schema schema = server.dumpSchema("data"); + Assert.assertEquals(2, schema.getFields().size()); + Assert.assertEquals("my_small_int", schema.getField(0).alias); + Assert.assertEquals(DataType.INTEGER, schema.getField(0).type); + Assert.assertEquals("my_tiny_int", schema.getField(1).alias); + Assert.assertEquals(DataType.INTEGER, schema.getField(1).type); + + // Ensure Pig can read data correctly. + Iterator it = server.openIterator("data"); + Tuple t = it.next(); + Assert.assertEquals(new Integer(Short.MIN_VALUE), t.get(0)); + Assert.assertEquals(new Integer(Byte.MIN_VALUE), t.get(1)); + t = it.next(); + Assert.assertEquals(new Integer(Short.MAX_VALUE), t.get(0)); + Assert.assertEquals(new Integer(Byte.MAX_VALUE), t.get(1)); + Assert.assertFalse(it.hasNext()); + + // Ensure Pig can write correctly to smallint/tinyint columns. This means values within the + // bounds of the column type are written, and values outside throw an exception. + Assert.assertEquals(0, driver.run("drop table if exists " + writeTblName).getResponseCode()); + Assert.assertEquals(0, driver.run("create table " + writeTblName + + " (my_small_int smallint, my_tiny_int tinyint) stored as rcfile").getResponseCode()); + + // Values within the column type bounds. + HcatTestUtils.createTestDataFile(writeDataFile.getAbsolutePath(), new String[]{ + String.format("%d\t%d", Short.MIN_VALUE, Byte.MIN_VALUE), + String.format("%d\t%d", Short.MAX_VALUE, Byte.MAX_VALUE) + }); + smallTinyIntBoundsCheckHelper(writeDataFile.getPath().replaceAll("\\\\", "/"), ExecJob.JOB_STATUS.COMPLETED); + + // Values outside the column type bounds will fail at runtime. + HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooSmall.tsv", new String[]{ + String.format("%d\t%d", Short.MIN_VALUE - 1, 0)}); + smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/shortTooSmall.tsv", ExecJob.JOB_STATUS.FAILED); + + HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/shortTooBig.tsv", new String[]{ + String.format("%d\t%d", Short.MAX_VALUE + 1, 0)}); + smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/shortTooBig.tsv", ExecJob.JOB_STATUS.FAILED); + + HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooSmall.tsv", new String[]{ + String.format("%d\t%d", 0, Byte.MIN_VALUE - 1)}); + smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/byteTooSmall.tsv", ExecJob.JOB_STATUS.FAILED); + + HcatTestUtils.createTestDataFile(TEST_DATA_DIR + "/byteTooBig.tsv", new String[]{ + String.format("%d\t%d", 0, Byte.MAX_VALUE + 1)}); + smallTinyIntBoundsCheckHelper(TEST_DATA_DIR + "/byteTooBig.tsv", ExecJob.JOB_STATUS.FAILED); + } + + private void smallTinyIntBoundsCheckHelper(String data, ExecJob.JOB_STATUS expectedStatus) + throws Exception { + Assert.assertEquals(0, driver.run("drop table if exists test_tbl").getResponseCode()); + Assert.assertEquals(0, driver.run("create table test_tbl" + + " (my_small_int smallint, my_tiny_int tinyint) stored as rcfile").getResponseCode()); + + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("data = load '" + data + + "' using PigStorage('\t') as (my_small_int:int, my_tiny_int:int);"); + server.registerQuery( + "store data into 'test_tbl' using org.apache.hive.hcatalog.pig.HCatStorer();"); + List jobs = server.executeBatch(); + Assert.assertEquals(expectedStatus, jobs.get(0).getStatus()); + } +} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java new file mode 100644 index 0000000..12a5d28 --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java @@ -0,0 +1,658 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.pig; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; + +import org.apache.hadoop.hive.ql.CommandNeedRetryException; +import org.apache.hive.hcatalog.HcatTestUtils; +import org.apache.hive.hcatalog.mapreduce.HCatBaseTest; +import org.apache.pig.EvalFunc; +import org.apache.pig.ExecType; +import org.apache.pig.PigException; +import org.apache.pig.PigServer; +import org.apache.pig.data.DataByteArray; +import org.apache.pig.data.Tuple; +import org.apache.pig.impl.logicalLayer.FrontendException; +import org.apache.pig.impl.util.LogUtils; +import org.junit.Assert; +import org.junit.Test; + +public class TestHCatStorer extends HCatBaseTest { + + private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; + + @Test + public void testPartColsInData() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int) partitioned by (b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + int LOOP_SIZE = 11; + String[] input = new String[LOOP_SIZE]; + for (int i = 0; i < LOOP_SIZE; i++) { + input[i] = i + "\t1"; + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('b=1');"); + server.registerQuery("B = load 'default.junit_unparted' using " + HCatLoader.class.getName() + "();"); + Iterator itr = server.openIterator("B"); + + int i = 0; + + while (itr.hasNext()) { + Tuple t = itr.next(); + Assert.assertEquals(2, t.size()); + Assert.assertEquals(t.get(0), i); + Assert.assertEquals(t.get(1), "1"); + i++; + } + + Assert.assertFalse(itr.hasNext()); + Assert.assertEquals(11, i); + } + + @Test + public void testMultiPartColsInData() throws IOException, CommandNeedRetryException { + + driver.run("drop table employee"); + String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; + + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + String[] inputData = {"111237\tKrishna\t01/01/1990\tM\tIN\tTN", + "111238\tKalpana\t01/01/2000\tF\tIN\tKA", + "111239\tSatya\t01/01/2001\tM\tIN\tKL", + "111240\tKavya\t01/01/2002\tF\tIN\tAP"}; + + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); + PigServer pig = new PigServer(ExecType.LOCAL); + pig.setBatchOn(); + pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); + pig.registerQuery("TN = FILTER A BY emp_state == 'TN';"); + pig.registerQuery("KA = FILTER A BY emp_state == 'KA';"); + pig.registerQuery("KL = FILTER A BY emp_state == 'KL';"); + pig.registerQuery("AP = FILTER A BY emp_state == 'AP';"); + pig.registerQuery("STORE TN INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=TN');"); + pig.registerQuery("STORE KA INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=KA');"); + pig.registerQuery("STORE KL INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=KL');"); + pig.registerQuery("STORE AP INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN,emp_state=AP');"); + pig.executeBatch(); + driver.run("select * from employee"); + ArrayList results = new ArrayList(); + driver.getResults(results); + Assert.assertEquals(4, results.size()); + Collections.sort(results); + Assert.assertEquals(inputData[0], results.get(0)); + Assert.assertEquals(inputData[1], results.get(1)); + Assert.assertEquals(inputData[2], results.get(2)); + Assert.assertEquals(inputData[3], results.get(3)); + driver.run("drop table employee"); + } + + @Test + public void testStoreInPartiitonedTbl() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int) partitioned by (b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + int LOOP_SIZE = 11; + String[] input = new String[LOOP_SIZE]; + for (int i = 0; i < LOOP_SIZE; i++) { + input[i] = i + ""; + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int);"); + server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('b=1');"); + server.registerQuery("B = load 'default.junit_unparted' using " + HCatLoader.class.getName() + "();"); + Iterator itr = server.openIterator("B"); + + int i = 0; + + while (itr.hasNext()) { + Tuple t = itr.next(); + Assert.assertEquals(2, t.size()); + Assert.assertEquals(t.get(0), i); + Assert.assertEquals(t.get(1), "1"); + i++; + } + + Assert.assertFalse(itr.hasNext()); + Assert.assertEquals(11, i); + } + + @Test + public void testNoAlias() throws IOException, CommandNeedRetryException { + driver.run("drop table junit_parted"); + String createTable = "create table junit_parted(a int, b string) partitioned by (ds string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + PigServer server = new PigServer(ExecType.LOCAL); + boolean errCaught = false; + try { + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("B = foreach A generate a+10, b;"); + server.registerQuery("store B into 'junit_parted' using " + HCatStorer.class.getName() + "('ds=20100101');"); + server.executeBatch(); + } catch (PigException fe) { + PigException pe = LogUtils.getPigException(fe); + Assert.assertTrue(pe instanceof FrontendException); + Assert.assertEquals(PigHCatUtil.PIG_EXCEPTION_CODE, pe.getErrorCode()); + Assert.assertTrue(pe.getMessage().contains("Column name for a field is not specified. Please provide the full schema as an argument to HCatStorer.")); + errCaught = true; + } + Assert.assertTrue(errCaught); + errCaught = false; + try { + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, B:chararray);"); + server.registerQuery("B = foreach A generate a, B;"); + server.registerQuery("store B into 'junit_parted' using " + HCatStorer.class.getName() + "('ds=20100101');"); + server.executeBatch(); + } catch (PigException fe) { + PigException pe = LogUtils.getPigException(fe); + Assert.assertTrue(pe instanceof FrontendException); + Assert.assertEquals(PigHCatUtil.PIG_EXCEPTION_CODE, pe.getErrorCode()); + Assert.assertTrue(pe.getMessage().contains("Column names should all be in lowercase. Invalid name found: B")); + errCaught = true; + } + driver.run("drop table junit_parted"); + Assert.assertTrue(errCaught); + } + + @Test + public void testStoreMultiTables() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + driver.run("drop table junit_unparted2"); + createTable = "create table junit_unparted2(a int, b string) stored as RCFILE"; + retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + int LOOP_SIZE = 3; + String[] input = new String[LOOP_SIZE * LOOP_SIZE]; + int k = 0; + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + input[k++] = si + "\t" + j; + } + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("B = filter A by a < 2;"); + server.registerQuery("store B into 'junit_unparted' using " + HCatStorer.class.getName() + "();"); + server.registerQuery("C = filter A by a >= 2;"); + server.registerQuery("store C into 'junit_unparted2' using " + HCatStorer.class.getName() + "();"); + server.executeBatch(); + + driver.run("select * from junit_unparted"); + ArrayList res = new ArrayList(); + driver.getResults(res); + driver.run("select * from junit_unparted2"); + ArrayList res2 = new ArrayList(); + driver.getResults(res2); + + res.addAll(res2); + driver.run("drop table junit_unparted"); + driver.run("drop table junit_unparted2"); + + Iterator itr = res.iterator(); + for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) { + Assert.assertEquals(input[i], itr.next()); + } + + Assert.assertFalse(itr.hasNext()); + + } + + @Test + public void testStoreWithNoSchema() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + int LOOP_SIZE = 3; + String[] input = new String[LOOP_SIZE * LOOP_SIZE]; + int k = 0; + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + input[k++] = si + "\t" + j; + } + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('');"); + server.executeBatch(); + + driver.run("select * from junit_unparted"); + ArrayList res = new ArrayList(); + driver.getResults(res); + driver.run("drop table junit_unparted"); + Iterator itr = res.iterator(); + for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) { + Assert.assertEquals(input[i], itr.next()); + } + + Assert.assertFalse(itr.hasNext()); + + } + + @Test + public void testStoreWithNoCtorArgs() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + int LOOP_SIZE = 3; + String[] input = new String[LOOP_SIZE * LOOP_SIZE]; + int k = 0; + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + input[k++] = si + "\t" + j; + } + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("store A into 'junit_unparted' using " + HCatStorer.class.getName() + "();"); + server.executeBatch(); + + driver.run("select * from junit_unparted"); + ArrayList res = new ArrayList(); + driver.getResults(res); + driver.run("drop table junit_unparted"); + Iterator itr = res.iterator(); + for (int i = 0; i < LOOP_SIZE * LOOP_SIZE; i++) { + Assert.assertEquals(input[i], itr.next()); + } + + Assert.assertFalse(itr.hasNext()); + + } + + @Test + public void testEmptyStore() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + int LOOP_SIZE = 3; + String[] input = new String[LOOP_SIZE * LOOP_SIZE]; + int k = 0; + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + input[k++] = si + "\t" + j; + } + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("B = filter A by a > 100;"); + server.registerQuery("store B into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int,b:chararray');"); + server.executeBatch(); + + driver.run("select * from junit_unparted"); + ArrayList res = new ArrayList(); + driver.getResults(res); + driver.run("drop table junit_unparted"); + Iterator itr = res.iterator(); + Assert.assertFalse(itr.hasNext()); + + } + + @Test + public void testBagNStruct() throws IOException, CommandNeedRetryException { + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(b string,a struct, arr_of_struct array, " + + "arr_of_struct2 array>, arr_of_struct3 array>) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + String[] inputData = new String[]{"zookeeper\t(2)\t{(pig)}\t{(pnuts,hdfs)}\t{(hadoop),(hcat)}", + "chubby\t(2)\t{(sawzall)}\t{(bigtable,gfs)}\t{(mapreduce),(hcat)}"}; + + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); + + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (b:chararray, a:tuple(a1:int), arr_of_struct:bag{mytup:tuple(s1:chararray)}, arr_of_struct2:bag{mytup:tuple(s1:chararray,s2:chararray)}, arr_of_struct3:bag{t3:tuple(s3:chararray)});"); + server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','b:chararray, a:tuple(a1:int)," + + " arr_of_struct:bag{mytup:tuple(s1:chararray)}, arr_of_struct2:bag{mytup:tuple(s1:chararray,s2:chararray)}, arr_of_struct3:bag{t3:tuple(s3:chararray)}');"); + server.executeBatch(); + + driver.run("select * from junit_unparted"); + ArrayList res = new ArrayList(); + driver.getResults(res); + driver.run("drop table junit_unparted"); + Iterator itr = res.iterator(); + Assert.assertEquals("zookeeper\t{\"a1\":2}\t[\"pig\"]\t[{\"s1\":\"pnuts\",\"s2\":\"hdfs\"}]\t[{\"s3\":\"hadoop\"},{\"s3\":\"hcat\"}]", itr.next()); + Assert.assertEquals("chubby\t{\"a1\":2}\t[\"sawzall\"]\t[{\"s1\":\"bigtable\",\"s2\":\"gfs\"}]\t[{\"s3\":\"mapreduce\"},{\"s3\":\"hcat\"}]", itr.next()); + Assert.assertFalse(itr.hasNext()); + + } + + @Test + public void testStoreFuncAllSimpleTypes() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int, b float, c double, d bigint, e string, h boolean, f binary, g binary) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + int i = 0; + String[] input = new String[3]; + input[i++] = "0\t\t\t\t\t\t\t"; //Empty values except first column + input[i++] = "\t" + i * 2.1f + "\t" + i * 1.1d + "\t" + i * 2L + "\t" + "lets hcat" + "\t" + "true" + "\tbinary-data"; //First column empty + input[i++] = i + "\t" + i * 2.1f + "\t" + i * 1.1d + "\t" + i * 2L + "\t" + "lets hcat" + "\t" + "false" + "\tbinary-data"; + + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:float, c:double, d:long, e:chararray, h:boolean, f:bytearray);"); + //null gets stored into column g which is a binary field. + server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int, b:float, c:double, d:long, e:chararray, h:boolean, f:bytearray');"); + server.executeBatch(); + + + driver.run("select * from junit_unparted"); + ArrayList res = new ArrayList(); + driver.getResults(res); + + Iterator itr = res.iterator(); + String next = itr.next(); + Assert.assertEquals("0\tNULL\tNULL\tNULL\tNULL\tNULL\tNULL\tNULL", next ); + Assert.assertEquals("NULL\t4.2\t2.2\t4\tlets hcat\ttrue\tbinary-data\tNULL", itr.next()); + Assert.assertEquals("3\t6.2999997\t3.3000000000000003\t6\tlets hcat\tfalse\tbinary-data\tNULL", itr.next()); + Assert.assertFalse(itr.hasNext()); + + server.registerQuery("B = load 'junit_unparted' using " + HCatLoader.class.getName() + ";"); + Iterator iter = server.openIterator("B"); + int count = 0; + int num5nulls = 0; + while (iter.hasNext()) { + Tuple t = iter.next(); + if (t.get(6) == null) { + num5nulls++; + } else { + Assert.assertTrue(t.get(6) instanceof DataByteArray); + } + Assert.assertNull(t.get(7)); + count++; + } + Assert.assertEquals(3, count); + Assert.assertEquals(1, num5nulls); + driver.run("drop table junit_unparted"); + } + + @Test + public void testStoreFuncSimple() throws IOException, CommandNeedRetryException { + + driver.run("drop table junit_unparted"); + String createTable = "create table junit_unparted(a int, b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + int LOOP_SIZE = 3; + String[] inputData = new String[LOOP_SIZE * LOOP_SIZE]; + int k = 0; + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + inputData[k++] = si + "\t" + j; + } + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("store A into 'default.junit_unparted' using " + HCatStorer.class.getName() + "('','a:int,b:chararray');"); + server.executeBatch(); + + driver.run("select * from junit_unparted"); + ArrayList res = new ArrayList(); + driver.getResults(res); + driver.run("drop table junit_unparted"); + Iterator itr = res.iterator(); + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + Assert.assertEquals(si + "\t" + j, itr.next()); + } + } + Assert.assertFalse(itr.hasNext()); + + } + + @Test + public void testDynamicPartitioningMultiPartColsInDataPartialSpec() throws IOException, CommandNeedRetryException { + + driver.run("drop table if exists employee"); + String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; + + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + String[] inputData = {"111237\tKrishna\t01/01/1990\tM\tIN\tTN", + "111238\tKalpana\t01/01/2000\tF\tIN\tKA", + "111239\tSatya\t01/01/2001\tM\tIN\tKL", + "111240\tKavya\t01/01/2002\tF\tIN\tAP"}; + + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); + PigServer pig = new PigServer(ExecType.LOCAL); + pig.setBatchOn(); + pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); + pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); + pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "('emp_country=IN');"); + pig.executeBatch(); + driver.run("select * from employee"); + ArrayList results = new ArrayList(); + driver.getResults(results); + Assert.assertEquals(4, results.size()); + Collections.sort(results); + Assert.assertEquals(inputData[0], results.get(0)); + Assert.assertEquals(inputData[1], results.get(1)); + Assert.assertEquals(inputData[2], results.get(2)); + Assert.assertEquals(inputData[3], results.get(3)); + driver.run("drop table employee"); + } + + @Test + public void testDynamicPartitioningMultiPartColsInDataNoSpec() throws IOException, CommandNeedRetryException { + + driver.run("drop table if exists employee"); + String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; + + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + String[] inputData = {"111237\tKrishna\t01/01/1990\tM\tIN\tTN", + "111238\tKalpana\t01/01/2000\tF\tIN\tKA", + "111239\tSatya\t01/01/2001\tM\tIN\tKL", + "111240\tKavya\t01/01/2002\tF\tIN\tAP"}; + + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); + PigServer pig = new PigServer(ExecType.LOCAL); + pig.setBatchOn(); + pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); + pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); + pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "();"); + pig.executeBatch(); + driver.run("select * from employee"); + ArrayList results = new ArrayList(); + driver.getResults(results); + Assert.assertEquals(4, results.size()); + Collections.sort(results); + Assert.assertEquals(inputData[0], results.get(0)); + Assert.assertEquals(inputData[1], results.get(1)); + Assert.assertEquals(inputData[2], results.get(2)); + Assert.assertEquals(inputData[3], results.get(3)); + driver.run("drop table employee"); + } + + @Test + public void testDynamicPartitioningMultiPartColsNoDataInDataNoSpec() throws IOException, CommandNeedRetryException { + + driver.run("drop table if exists employee"); + String createTable = "CREATE TABLE employee (emp_id INT, emp_name STRING, emp_start_date STRING , emp_gender STRING ) " + + " PARTITIONED BY (emp_country STRING , emp_state STRING ) STORED AS RCFILE"; + + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + + String[] inputData = {}; + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); + + PigServer pig = new PigServer(ExecType.LOCAL); + pig.setBatchOn(); + pig.registerQuery("A = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (emp_id:int,emp_name:chararray,emp_start_date:chararray," + + "emp_gender:chararray,emp_country:chararray,emp_state:chararray);"); + pig.registerQuery("IN = FILTER A BY emp_country == 'IN';"); + pig.registerQuery("STORE IN INTO 'employee' USING " + HCatStorer.class.getName() + "();"); + pig.executeBatch(); + driver.run("select * from employee"); + ArrayList results = new ArrayList(); + driver.getResults(results); + Assert.assertEquals(0, results.size()); + driver.run("drop table employee"); + } + + public void testPartitionPublish() + throws IOException, CommandNeedRetryException { + + driver.run("drop table ptn_fail"); + String createTable = "create table ptn_fail(a int, c string) partitioned by (b string) stored as RCFILE"; + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table."); + } + int LOOP_SIZE = 11; + String[] input = new String[LOOP_SIZE]; + + for (int i = 0; i < LOOP_SIZE; i++) { + input[i] = i + "\tmath"; + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, input); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + + "' as (a:int, c:chararray);"); + server.registerQuery("B = filter A by " + FailEvalFunc.class.getName() + + "($0);"); + server.registerQuery("store B into 'ptn_fail' using " + + HCatStorer.class.getName() + "('b=math');"); + server.executeBatch(); + + String query = "show partitions ptn_fail"; + retCode = driver.run(query).getResponseCode(); + + if (retCode != 0) { + throw new IOException("Error " + retCode + " running query " + + query); + } + + ArrayList res = new ArrayList(); + driver.getResults(res); + Assert.assertEquals(0, res.size()); + + // Make sure the partitions directory is not in hdfs. + Assert.assertTrue((new File(TEST_WAREHOUSE_DIR + "/ptn_fail")).exists()); + Assert.assertFalse((new File(TEST_WAREHOUSE_DIR + "/ptn_fail/b=math")) + .exists()); + } + + static public class FailEvalFunc extends EvalFunc { + + /* + * @param Tuple /* @return null /* @throws IOException + * + * @see org.apache.pig.EvalFunc#exec(org.apache.pig.data.Tuple) + */ + @Override + public Boolean exec(Tuple tuple) throws IOException { + throw new IOException("Eval Func to mimic Failure."); + } + + } +} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerMulti.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerMulti.java new file mode 100644 index 0000000..e870a98 --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerMulti.java @@ -0,0 +1,201 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.pig; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +import junit.framework.TestCase; + +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.CommandNeedRetryException; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hive.hcatalog.data.Pair; +import org.apache.pig.ExecType; +import org.apache.pig.PigServer; + +public class TestHCatStorerMulti extends TestCase { + private static final String TEST_DATA_DIR = + "/tmp/build/test/data/" + TestHCatStorerMulti.class.getCanonicalName(); + private static final String TEST_WAREHOUSE_DIR = TEST_DATA_DIR + "/warehouse"; + private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; + + private static final String BASIC_TABLE = "junit_unparted_basic"; + private static final String PARTITIONED_TABLE = "junit_parted_basic"; + private static Driver driver; + + private static Map> basicInputData; + + protected String storageFormat() { + return "RCFILE tblproperties('hcat.isd'='org.apache.hive.hcatalog.rcfile.RCFileInputDriver'," + + "'hcat.osd'='org.apache.hive.hcatalog.rcfile.RCFileOutputDriver')"; + } + + private void dropTable(String tablename) throws IOException, CommandNeedRetryException { + driver.run("drop table " + tablename); + } + + private void createTable(String tablename, String schema, String partitionedBy) throws IOException, CommandNeedRetryException { + String createTable; + createTable = "create table " + tablename + "(" + schema + ") "; + if ((partitionedBy != null) && (!partitionedBy.trim().isEmpty())) { + createTable = createTable + "partitioned by (" + partitionedBy + ") "; + } + createTable = createTable + "stored as " + storageFormat(); + int retCode = driver.run(createTable).getResponseCode(); + if (retCode != 0) { + throw new IOException("Failed to create table. [" + createTable + "], return code from hive driver : [" + retCode + "]"); + } + } + + private void createTable(String tablename, String schema) throws IOException, CommandNeedRetryException { + createTable(tablename, schema, null); + } + + @Override + protected void setUp() throws Exception { + if (driver == null) { + HiveConf hiveConf = new HiveConf(this.getClass()); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, TEST_WAREHOUSE_DIR); + driver = new Driver(hiveConf); + SessionState.start(new CliSessionState(hiveConf)); + } + + cleanup(); + } + + @Override + protected void tearDown() throws Exception { + cleanup(); + } + + public void testStoreBasicTable() throws Exception { + + + createTable(BASIC_TABLE, "a int, b string"); + + populateBasicFile(); + + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();"); + + server.executeBatch(); + + driver.run("select * from " + BASIC_TABLE); + ArrayList unpartitionedTableValuesReadFromHiveDriver = new ArrayList(); + driver.getResults(unpartitionedTableValuesReadFromHiveDriver); + assertEquals(basicInputData.size(), unpartitionedTableValuesReadFromHiveDriver.size()); + } + + public void testStorePartitionedTable() throws Exception { + createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); + + populateBasicFile(); + + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + + server.registerQuery("B2 = filter A by a < 2;"); + server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=0');"); + server.registerQuery("C2 = filter A by a >= 2;"); + server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=1');"); + + server.executeBatch(); + + driver.run("select * from " + PARTITIONED_TABLE); + ArrayList partitionedTableValuesReadFromHiveDriver = new ArrayList(); + driver.getResults(partitionedTableValuesReadFromHiveDriver); + assertEquals(basicInputData.size(), partitionedTableValuesReadFromHiveDriver.size()); + } + + public void testStoreTableMulti() throws Exception { + + + createTable(BASIC_TABLE, "a int, b string"); + createTable(PARTITIONED_TABLE, "a int, b string", "bkt string"); + + populateBasicFile(); + + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + server.registerQuery("A = load '" + INPUT_FILE_NAME + "' as (a:int, b:chararray);"); + server.registerQuery("store A into '" + BASIC_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer();"); + + server.registerQuery("B2 = filter A by a < 2;"); + server.registerQuery("store B2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=0');"); + server.registerQuery("C2 = filter A by a >= 2;"); + server.registerQuery("store C2 into '" + PARTITIONED_TABLE + "' using org.apache.hive.hcatalog.pig.HCatStorer('bkt=1');"); + + server.executeBatch(); + + driver.run("select * from " + BASIC_TABLE); + ArrayList unpartitionedTableValuesReadFromHiveDriver = new ArrayList(); + driver.getResults(unpartitionedTableValuesReadFromHiveDriver); + driver.run("select * from " + PARTITIONED_TABLE); + ArrayList partitionedTableValuesReadFromHiveDriver = new ArrayList(); + driver.getResults(partitionedTableValuesReadFromHiveDriver); + assertEquals(basicInputData.size(), unpartitionedTableValuesReadFromHiveDriver.size()); + assertEquals(basicInputData.size(), partitionedTableValuesReadFromHiveDriver.size()); + } + + private void populateBasicFile() throws IOException { + int LOOP_SIZE = 3; + String[] input = new String[LOOP_SIZE * LOOP_SIZE]; + basicInputData = new HashMap>(); + int k = 0; + File file = new File(INPUT_FILE_NAME); + file.deleteOnExit(); + FileWriter writer = new FileWriter(file); + for (int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for (int j = 1; j <= LOOP_SIZE; j++) { + String sj = "S" + j + "S"; + input[k] = si + "\t" + sj; + basicInputData.put(k, new Pair(i, sj)); + writer.write(input[k] + "\n"); + k++; + } + } + writer.close(); + } + + private void cleanup() throws IOException, CommandNeedRetryException { + File f = new File(TEST_WAREHOUSE_DIR); + if (f.exists()) { + FileUtil.fullyDelete(f); + } + new File(TEST_WAREHOUSE_DIR).mkdirs(); + + dropTable(BASIC_TABLE); + dropTable(PARTITIONED_TABLE); + } +} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerWrapper.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerWrapper.java new file mode 100644 index 0000000..e10f2c8 --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorerWrapper.java @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.pig; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.UUID; + +import org.apache.hadoop.hive.ql.CommandNeedRetryException; +import org.apache.hive.hcatalog.HcatTestUtils; +import org.apache.hive.hcatalog.mapreduce.HCatBaseTest; +import org.apache.pig.ExecType; +import org.apache.pig.PigServer; +import org.junit.Assert; +import org.junit.Test; + +/** + * This test checks the {@link HCatConstants#HCAT_PIG_STORER_EXTERNAL_LOCATION} that we can set in the + * UDFContext of {@link HCatStorer} so that it writes to the specified external location. + * + * Since {@link HCatStorer} does not allow extra parameters in the constructor, we use {@link HCatStorerWrapper} + * that always treats the last parameter as the external path. + */ +public class TestHCatStorerWrapper extends HCatBaseTest { + + private static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; + + @Test + public void testStoreExternalTableWithExternalDir() throws IOException, CommandNeedRetryException{ + + File tmpExternalDir = new File(TEST_DATA_DIR, UUID.randomUUID().toString()); + tmpExternalDir.deleteOnExit(); + + String part_val = "100"; + + driver.run("drop table junit_external"); + String createTable = "create external table junit_external(a int, b string) partitioned by (c string) stored as RCFILE"; + Assert.assertEquals(0, driver.run(createTable).getResponseCode()); + + int LOOP_SIZE = 3; + String[] inputData = new String[LOOP_SIZE*LOOP_SIZE]; + int k = 0; + for(int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for(int j=1;j<=LOOP_SIZE;j++) { + inputData[k++] = si + "\t"+j; + } + } + HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData); + PigServer server = new PigServer(ExecType.LOCAL); + server.setBatchOn(); + logAndRegister(server, "A = load '"+INPUT_FILE_NAME+"' as (a:int, b:chararray);"); + logAndRegister(server, "store A into 'default.junit_external' using " + HCatStorerWrapper.class.getName() + + "('c=" + part_val + "','" + tmpExternalDir.getPath().replaceAll("\\\\", "/") + "');"); + server.executeBatch(); + + Assert.assertTrue(tmpExternalDir.exists()); + Assert.assertTrue(new File(tmpExternalDir.getPath().replaceAll("\\\\", "/") + "/" + "part-m-00000").exists()); + + driver.run("select * from junit_external"); + ArrayList res = new ArrayList(); + driver.getResults(res); + driver.run("drop table junit_external"); + Iterator itr = res.iterator(); + for(int i = 1; i <= LOOP_SIZE; i++) { + String si = i + ""; + for(int j=1;j<=LOOP_SIZE;j++) { + Assert.assertEquals( si + "\t" + j + "\t" + part_val,itr.next()); + } + } + Assert.assertFalse(itr.hasNext()); + + } +} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatLoader.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatLoader.java new file mode 100644 index 0000000..8a48e4c --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatLoader.java @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.pig; + +public class TestOrcHCatLoader extends TestHCatLoader { + + @Override + protected String storageFormat() { + return "orc"; + } + +} + diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatLoaderComplexSchema.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatLoaderComplexSchema.java new file mode 100644 index 0000000..a8748d0 --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatLoaderComplexSchema.java @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.pig; + +public class TestOrcHCatLoaderComplexSchema extends TestHCatLoaderComplexSchema { + + @Override + protected String storageFormat() { + return "orc"; + } + +} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatStorer.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatStorer.java new file mode 100644 index 0000000..ea12d42 --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestOrcHCatStorer.java @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.pig; + +public class TestOrcHCatStorer extends TestHCatStorerMulti { + + @Override + protected String storageFormat() { + return "orc"; + } +} + diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPermsInheritance.java.broken hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPermsInheritance.java.broken new file mode 100644 index 0000000..fce1e70 --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPermsInheritance.java.broken @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +-->There are two pieces of code that sets directory permissions. +-->One that sets the UMask which only woks for dfs filesystem. +-->And the other change the permission of directories after they are created. +-->I removed that since it is not secure and just add more load on the namenode. +-->We should push this test to e2e to verify what actually runs in production. + +package org.apache.hcatalog.pig; + +import java.io.IOException; + +import junit.framework.TestCase; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.UnknownTableException; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hcatalog.ExitException; +import org.apache.hcatalog.NoExitSecurityManager; +import org.apache.hcatalog.cli.HCatCli; +import org.apache.hcatalog.pig.HCatStorer; +import org.apache.pig.ExecType; +import org.apache.pig.PigServer; +import org.apache.pig.impl.util.UDFContext; +import org.apache.thrift.TException; + +public class TestPermsInheritance extends TestCase { + + @Override + protected void setUp() throws Exception { + super.setUp(); + securityManager = System.getSecurityManager(); + System.setSecurityManager(new NoExitSecurityManager()); + msc = new HiveMetaStoreClient(conf); + msc.dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME,"testNoPartTbl", true,true); + System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); + System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); + msc.dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME,"testPartTbl", true,true); + pig = new PigServer(ExecType.LOCAL, conf.getAllProperties()); + UDFContext.getUDFContext().setClientSystemProps(); + } + + private HiveMetaStoreClient msc; + private SecurityManager securityManager; + private PigServer pig; + + @Override + protected void tearDown() throws Exception { + super.tearDown(); + System.setSecurityManager(securityManager); + } + + private final HiveConf conf = new HiveConf(this.getClass()); + + public void testNoPartTbl() throws IOException, MetaException, UnknownTableException, TException, NoSuchObjectException, HiveException{ + + try{ + HCatCli.main(new String[]{"-e","create table testNoPartTbl (line string) stored as RCFILE", "-p","rwx-wx---"}); + } + catch(Exception e){ + assertTrue(e instanceof ExitException); + assertEquals(((ExitException)e).getStatus(), 0); + } + Warehouse wh = new Warehouse(conf); + Path dfsPath = wh.getTablePath(Hive.get(conf).getDatabase(MetaStoreUtils.DEFAULT_DATABASE_NAME), "testNoPartTbl"); + FileSystem fs = dfsPath.getFileSystem(conf); + assertEquals(fs.getFileStatus(dfsPath).getPermission(),FsPermission.valueOf("drwx-wx---")); + + pig.setBatchOn(); + pig.registerQuery("A = load 'build.xml' as (line:chararray);"); + pig.registerQuery("store A into 'testNoPartTbl' using "+HCatStorer.class.getName()+"();"); + pig.executeBatch(); + FileStatus[] status = fs.listStatus(dfsPath,hiddenFileFilter); + + assertEquals(status.length, 1); + assertEquals(FsPermission.valueOf("drwx-wx---"),status[0].getPermission()); + + try{ + HCatCli.main(new String[]{"-e","create table testPartTbl (line string) partitioned by (a string) stored as RCFILE", "-p","rwx-wx--x"}); + } + catch(Exception e){ + assertTrue(e instanceof ExitException); + assertEquals(((ExitException)e).getStatus(), 0); + } + + dfsPath = wh.getTablePath(Hive.get(conf).getDatabase(MetaStoreUtils.DEFAULT_DATABASE_NAME), "testPartTbl"); + assertEquals(fs.getFileStatus(dfsPath).getPermission(),FsPermission.valueOf("drwx-wx--x")); + + pig.setBatchOn(); + pig.registerQuery("A = load 'build.xml' as (line:chararray);"); + pig.registerQuery("store A into 'testPartTbl' using "+HCatStorer.class.getName()+"('a=part');"); + pig.executeBatch(); + + Path partPath = new Path(dfsPath,"a=part"); + assertEquals(FsPermission.valueOf("drwx-wx--x"),fs.getFileStatus(partPath).getPermission()); + status = fs.listStatus(partPath,hiddenFileFilter); + assertEquals(status.length, 1); + assertEquals(FsPermission.valueOf("drwx-wx--x"),status[0].getPermission()); + } + + private static final PathFilter hiddenFileFilter = new PathFilter(){ + public boolean accept(Path p){ + String name = p.getName(); + return !name.startsWith("_") && !name.startsWith("."); + } + }; +} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigHCatUtil.java hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigHCatUtil.java new file mode 100644 index 0000000..83dbbc0 --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigHCatUtil.java @@ -0,0 +1,95 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.pig; + +import com.google.common.collect.Lists; +import junit.framework.Assert; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.pig.ResourceSchema; +import org.apache.pig.ResourceSchema.ResourceFieldSchema; +import org.apache.pig.data.DataType; +import org.apache.pig.impl.util.UDFContext; +import org.junit.Test; + +public class TestPigHCatUtil { + + @Test + public void testGetBagSubSchema() throws Exception { + + // Define the expected schema. + ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1]; + bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("innertuple") + .setDescription("The tuple in the bag").setType(DataType.TUPLE); + + ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; + innerTupleFieldSchemas[0] = + new ResourceFieldSchema().setName("innerfield").setType(DataType.CHARARRAY); + + bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas)); + ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas); + + // Get the actual converted schema. + HCatSchema hCatSchema = new HCatSchema(Lists.newArrayList( + new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null))); + HCatFieldSchema hCatFieldSchema = + new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, hCatSchema, null); + ResourceSchema actual = PigHCatUtil.getBagSubSchema(hCatFieldSchema); + + Assert.assertEquals(expected.toString(), actual.toString()); + } + + @Test + public void testGetBagSubSchemaConfigured() throws Exception { + + // NOTE: pig-0.8 sets client system properties by actually getting the client + // system properties. Starting in pig-0.9 you must pass the properties in. + // When updating our pig dependency this will need updated. + System.setProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME, "t"); + System.setProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME, "FIELDNAME_tuple"); + UDFContext.getUDFContext().setClientSystemProps(System.getProperties()); + + // Define the expected schema. + ResourceFieldSchema[] bagSubFieldSchemas = new ResourceFieldSchema[1]; + bagSubFieldSchemas[0] = new ResourceFieldSchema().setName("t") + .setDescription("The tuple in the bag").setType(DataType.TUPLE); + + ResourceFieldSchema[] innerTupleFieldSchemas = new ResourceFieldSchema[1]; + innerTupleFieldSchemas[0] = + new ResourceFieldSchema().setName("llama_tuple").setType(DataType.CHARARRAY); + + bagSubFieldSchemas[0].setSchema(new ResourceSchema().setFields(innerTupleFieldSchemas)); + ResourceSchema expected = new ResourceSchema().setFields(bagSubFieldSchemas); + + // Get the actual converted schema. + HCatSchema actualHCatSchema = new HCatSchema(Lists.newArrayList( + new HCatFieldSchema("innerLlama", HCatFieldSchema.Type.STRING, null))); + HCatFieldSchema actualHCatFieldSchema = + new HCatFieldSchema("llama", HCatFieldSchema.Type.ARRAY, actualHCatSchema, null); + ResourceSchema actual = PigHCatUtil.getBagSubSchema(actualHCatFieldSchema); + + Assert.assertEquals(expected.toString(), actual.toString()); + + // Clean up System properties that were set by this test + System.clearProperty(HCatConstants.HCAT_PIG_INNER_TUPLE_NAME); + System.clearProperty(HCatConstants.HCAT_PIG_INNER_FIELD_NAME); + } +} diff --git hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigStorageDriver.java.broken hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigStorageDriver.java.broken new file mode 100644 index 0000000..fdf3a98 --- /dev/null +++ hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestPigStorageDriver.java.broken @@ -0,0 +1,272 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hcatalog.pig; + +import java.io.BufferedInputStream; +import java.io.DataInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.Iterator; +import java.util.Map; + +import junit.framework.TestCase; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.api.InvalidOperationException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.UnknownTableException; +import org.apache.hadoop.hive.ql.CommandNeedRetryException; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.io.RCFileInputFormat; +import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; +import org.apache.hcatalog.common.HCatConstants; +import org.apache.hcatalog.pig.HCatLoader; +import org.apache.pig.ExecType; +import org.apache.pig.PigServer; +import org.apache.pig.data.Tuple; +import org.apache.pig.impl.logicalLayer.FrontendException; +import org.apache.pig.impl.util.UDFContext; +import org.apache.thrift.TException; + +public class TestPigStorageDriver extends TestCase { + + private HiveConf hcatConf; + private Driver hcatDriver; + private HiveMetaStoreClient msc; + private static String tblLocation = "/tmp/test_pig/data"; + private static String anyExistingFileInCurDir = "ivy.xml"; + private static String warehouseDir = "/tmp/hcat_junit_warehouse"; + + @Override + protected void setUp() throws Exception { + + hcatConf = new HiveConf(this.getClass()); + hcatConf.set(ConfVars.PREEXECHOOKS.varname, ""); + hcatConf.set(ConfVars.POSTEXECHOOKS.varname, ""); + hcatConf.set(ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, HCatSemanticAnalyzer.class.getName()); + hcatDriver = new Driver(hcatConf); + msc = new HiveMetaStoreClient(hcatConf); + SessionState.start(new CliSessionState(hcatConf)); + super.setUp(); + } + + @Override + protected void tearDown() throws Exception { + super.tearDown(); + } + + public void testPigStorageDriver() throws IOException, CommandNeedRetryException{ + + String fsLoc = hcatConf.get("fs.default.name"); + Path tblPath = new Path(fsLoc, tblLocation); + String tblName = "junit_pigstorage"; + tblPath.getFileSystem(hcatConf).copyFromLocalFile(new Path(anyExistingFileInCurDir),tblPath); + + hcatDriver.run("drop table " + tblName); + CommandProcessorResponse resp; + String createTable = "create table " + tblName + " (a string) partitioned by (b string) stored as TEXTFILE"; + + resp = hcatDriver.run(createTable); + assertEquals(0, resp.getResponseCode()); + assertNull(resp.getErrorMessage()); + + resp = hcatDriver.run("alter table " + tblName + " add partition (b='2010-10-10') location '"+new Path(fsLoc, "/tmp/test_pig")+"'"); + assertEquals(0, resp.getResponseCode()); + assertNull(resp.getErrorMessage()); + + resp = hcatDriver.run("alter table " + tblName + " partition (b='2010-10-10') set fileformat TEXTFILE"); + assertEquals(0, resp.getResponseCode()); + assertNull(resp.getErrorMessage()); + + resp = hcatDriver.run("desc extended " + tblName + " partition (b='2010-10-10')"); + assertEquals(0, resp.getResponseCode()); + assertNull(resp.getErrorMessage()); + + PigServer server = new PigServer(ExecType.LOCAL, hcatConf.getAllProperties()); + UDFContext.getUDFContext().setClientSystemProps(); + server.registerQuery(" a = load '" + tblName + "' using "+HCatLoader.class.getName()+";"); + Iterator itr = server.openIterator("a"); + boolean result = compareWithFile(itr, anyExistingFileInCurDir, 2, "2010-10-10", null); + assertTrue(result); + + server.registerQuery("a = load '"+tblPath.toString()+"' using PigStorage() as (a:chararray);"); + server.store("a", tblName, HCatStorer.class.getName() + "('b=2010-10-11')"); + + server.registerQuery("a = load '" + warehouseDir + "/" + tblName + "/b=2010-10-11' using PigStorage() as (a:chararray);"); + itr = server.openIterator("a"); + result = compareWithFile(itr, anyExistingFileInCurDir, 1, "2010-10-11", null); + assertTrue(result); + + // Test multi-store + server.registerQuery("a = load '"+tblPath.toString()+"' using PigStorage() as (a:chararray);"); + server.registerQuery("store a into '" + tblName + "' using " + HCatStorer.class.getName() + "('b=2010-11-01');"); + server.registerQuery("store a into '" + tblName + "' using " + HCatStorer.class.getName() + "('b=2010-11-02');"); + + server.registerQuery("a = load '" + warehouseDir + "/" + tblName + "/b=2010-11-01' using PigStorage() as (a:chararray);"); + itr = server.openIterator("a"); + result = compareWithFile(itr, anyExistingFileInCurDir, 1, "2010-11-01", null); + assertTrue(result); + + server.registerQuery("a = load '" + warehouseDir + "/" + tblName + "/b=2010-11-02' using PigStorage() as (a:chararray);"); + itr = server.openIterator("a"); + result = compareWithFile(itr, anyExistingFileInCurDir, 1, "2010-11-02", null); + assertTrue(result); + + hcatDriver.run("drop table " + tblName); + } + + private boolean compareWithFile(Iterator itr, String factFile, int numColumn, String key, String valueSuffix) throws IOException { + DataInputStream stream = new DataInputStream(new BufferedInputStream(new FileInputStream(new File(factFile)))); + while(itr.hasNext()){ + Tuple t = itr.next(); + assertEquals(numColumn, t.size()); + if(t.get(0) != null) { + // If underlying data-field is empty. PigStorage inserts null instead + // of empty String objects. + assertTrue(t.get(0) instanceof String); + String expected = stream.readLine(); + if (valueSuffix!=null) + expected += valueSuffix; + assertEquals(expected, t.get(0)); + } + else{ + assertTrue(stream.readLine().isEmpty()); + } + + if (numColumn>1) { + // The second column must be key + assertTrue(t.get(1) instanceof String); + assertEquals(key, t.get(1)); + } + } + assertEquals(0,stream.available()); + stream.close(); + return true; + } + + public void testDelim() throws MetaException, TException, UnknownTableException, NoSuchObjectException, InvalidOperationException, IOException, CommandNeedRetryException{ + + hcatDriver.run("drop table junit_pigstorage_delim"); + + CommandProcessorResponse resp; + String createTable = "create table junit_pigstorage_delim (a0 string, a1 string) partitioned by (b string) stored as RCFILE"; + + resp = hcatDriver.run(createTable); + + assertEquals(0, resp.getResponseCode()); + assertNull(resp.getErrorMessage()); + + resp = hcatDriver.run("alter table junit_pigstorage_delim add partition (b='2010-10-10')"); + assertEquals(0, resp.getResponseCode()); + assertNull(resp.getErrorMessage()); + + resp = hcatDriver.run("alter table junit_pigstorage_delim partition (b='2010-10-10') set fileformat TEXTFILE"); + + Partition part = msc.getPartition(MetaStoreUtils.DEFAULT_DATABASE_NAME, "junit_pigstorage_delim", "b=2010-10-10"); + Map partParms = part.getParameters(); + partParms.put(HCatConstants.HCAT_PIG_LOADER_ARGS, "control-A"); + partParms.put(HCatConstants.HCAT_PIG_STORER_ARGS, "control-A"); + + msc.alter_partition(MetaStoreUtils.DEFAULT_DATABASE_NAME, "junit_pigstorage_delim", part); + + PigServer server = new PigServer(ExecType.LOCAL, hcatConf.getAllProperties()); + UDFContext.getUDFContext().setClientSystemProps(); + server.registerQuery(" a = load 'junit_pigstorage_delim' using "+HCatLoader.class.getName()+";"); + try{ + server.openIterator("a"); + }catch(FrontendException fe){} + + resp = hcatDriver.run("alter table junit_pigstorage_delim set fileformat TEXTFILE"); + assertEquals(0, resp.getResponseCode()); + assertNull(resp.getErrorMessage()); + resp = hcatDriver.run("alter table junit_pigstorage_delim set TBLPROPERTIES ('hcat.pig.loader.args'=':', 'hcat.pig.storer.args'=':')"); + assertEquals(0, resp.getResponseCode()); + assertNull(resp.getErrorMessage()); + + File inputFile = File.createTempFile("hcat_test", ""); + PrintWriter p = new PrintWriter(new FileWriter(inputFile)); + p.println("1\t2"); + p.println("3\t4"); + p.close(); + server.registerQuery("a = load '"+inputFile.toString()+"' using PigStorage() as (a0:chararray, a1:chararray);"); + server.store("a", "junit_pigstorage_delim", HCatStorer.class.getName() + "('b=2010-10-11')"); + + server.registerQuery("a = load '/tmp/hcat_junit_warehouse/junit_pigstorage_delim/b=2010-10-11' using PigStorage() as (a:chararray);"); + Iterator itr = server.openIterator("a"); + + assertTrue(itr.hasNext()); + Tuple t = itr.next(); + assertTrue(t.get(0).equals("1:2")); + + assertTrue(itr.hasNext()); + t = itr.next(); + assertTrue(t.get(0).equals("3:4")); + + assertFalse(itr.hasNext()); + inputFile.delete(); + } + + public void testMultiConstructArgs() throws MetaException, TException, UnknownTableException, NoSuchObjectException, InvalidOperationException, IOException, CommandNeedRetryException{ + + String fsLoc = hcatConf.get("fs.default.name"); + Path tblPath = new Path(fsLoc, tblLocation); + String tblName = "junit_pigstorage_constructs"; + tblPath.getFileSystem(hcatConf).copyFromLocalFile(new Path(anyExistingFileInCurDir),tblPath); + + hcatDriver.run("drop table junit_pigstorage_constructs"); + + CommandProcessorResponse resp; + String createTable = "create table " + tblName + " (a string) partitioned by (b string) stored as TEXTFILE"; + + resp = hcatDriver.run(createTable); + + assertEquals(0, resp.getResponseCode()); + assertNull(resp.getErrorMessage()); + + resp = hcatDriver.run("alter table " + tblName + " set TBLPROPERTIES ('hcat.pig.storer'='org.apache.hcatalog.pig.MyPigStorage', 'hcat.pig.storer.args'=':#hello', 'hcat.pig.args.delimiter'='#')"); + assertEquals(0, resp.getResponseCode()); + assertNull(resp.getErrorMessage()); + + PigServer server = new PigServer(ExecType.LOCAL, hcatConf.getAllProperties()); + UDFContext.getUDFContext().setClientSystemProps(); + + server.registerQuery("a = load '"+tblPath.toString()+"' using PigStorage() as (a:chararray);"); + server.store("a", tblName, HCatStorer.class.getName() + "('b=2010-10-11')"); + + server.registerQuery("a = load '" + warehouseDir + "/" + tblName + "/b=2010-10-11' using PigStorage() as (a:chararray);"); + Iterator itr = server.openIterator("a"); + boolean result = compareWithFile(itr, anyExistingFileInCurDir, 1, "2010-10-11", ":hello"); + assertTrue(result); + } +} diff --git hcatalog/pom.xml hcatalog/pom.xml index 81f1e71..9f48a41 100644 --- hcatalog/pom.xml +++ hcatalog/pom.xml @@ -43,7 +43,7 @@ 4.0.0 - org.apache.hcatalog + org.apache.hive.hcatalog hcatalog 0.12.0-SNAPSHOT pom diff --git hcatalog/server-extensions/pom.xml hcatalog/server-extensions/pom.xml index ee6d711..7961400 100644 --- hcatalog/server-extensions/pom.xml +++ hcatalog/server-extensions/pom.xml @@ -22,14 +22,13 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> - org.apache.hcatalog + org.apache.hive.hcatalog hcatalog 0.12.0-SNAPSHOT ../pom.xml 4.0.0 - org.apache.hcatalog hcatalog-server-extensions jar server-extensions @@ -61,7 +60,7 @@ compile - org.apache.hcatalog + org.apache.hive.hcatalog hcatalog-core ${hcatalog.version} compile diff --git hcatalog/server-extensions/src/main/java/org/apache/hcatalog/listener/NotificationListener.java hcatalog/server-extensions/src/main/java/org/apache/hcatalog/listener/NotificationListener.java deleted file mode 100644 index 12b57e4..0000000 --- hcatalog/server-extensions/src/main/java/org/apache/hcatalog/listener/NotificationListener.java +++ /dev/null @@ -1,454 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.listener; - -import java.util.ArrayList; -import java.util.HashMap; - -import javax.jms.Connection; -import javax.jms.ConnectionFactory; -import javax.jms.DeliveryMode; -import javax.jms.Destination; -import javax.jms.ExceptionListener; -import javax.jms.JMSException; -import javax.jms.Message; -import javax.jms.MessageProducer; -import javax.jms.Session; -import javax.jms.Topic; -import javax.naming.Context; -import javax.naming.InitialContext; -import javax.naming.NamingException; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStore.HMSHandler; -import org.apache.hadoop.hive.metastore.MetaStoreEventListener; -import org.apache.hadoop.hive.metastore.api.InvalidOperationException; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.metastore.api.Order; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.events.AddPartitionEvent; -import org.apache.hadoop.hive.metastore.events.AlterPartitionEvent; -import org.apache.hadoop.hive.metastore.events.AlterTableEvent; -import org.apache.hadoop.hive.metastore.events.CreateDatabaseEvent; -import org.apache.hadoop.hive.metastore.events.CreateTableEvent; -import org.apache.hadoop.hive.metastore.events.DropDatabaseEvent; -import org.apache.hadoop.hive.metastore.events.DropPartitionEvent; -import org.apache.hadoop.hive.metastore.events.DropTableEvent; -import org.apache.hadoop.hive.metastore.events.ListenerEvent; -import org.apache.hadoop.hive.metastore.events.LoadPartitionDoneEvent; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.messaging.HCatEventMessage; -import org.apache.hcatalog.messaging.MessageFactory; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Implementation of - * {@link org.apache.hadoop.hive.metastore.MetaStoreEventListener} It sends - * message on two type of topics. One has name of form dbName.tblName On this - * topic, two kind of messages are sent: add/drop partition and - * finalize_partition message. Second topic has name "HCAT" and messages sent on - * it are: add/drop database and add/drop table. All messages also has a - * property named "HCAT_EVENT" set on them whose value can be used to configure - * message selector on subscriber side. - */ -public class NotificationListener extends MetaStoreEventListener { - - private static final Logger LOG = LoggerFactory.getLogger(NotificationListener.class); - protected Connection conn; - private static MessageFactory messageFactory = MessageFactory.getInstance(); - public static final int NUM_RETRIES = 1; - private static final String HEALTH_CHECK_TOPIC_SUFFIX = "jms_health_check"; - private static final String HEALTH_CHECK_MSG = "HCAT_JMS_HEALTH_CHECK_MESSAGE"; - - protected final ThreadLocal session = new ThreadLocal() { - @Override - protected Session initialValue() { - try { - return createSession(); - } catch (Exception e) { - LOG.error("Couldn't create JMS Session", e); - return null; - } - } - - @Override - public void remove() { - if (get() != null) { - try { - get().close(); - } catch (Exception e) { - LOG.error("Unable to close bad JMS session, ignored error", e); - } - } - super.remove(); - } - }; - - /** - * Create message bus connection and session in constructor. - */ - public NotificationListener(final Configuration conf) { - super(conf); - testAndCreateConnection(); - } - - private static String getTopicName(Partition partition, - ListenerEvent partitionEvent) throws MetaException { - try { - return partitionEvent.getHandler() - .get_table(partition.getDbName(), partition.getTableName()) - .getParameters().get(HCatConstants.HCAT_MSGBUS_TOPIC_NAME); - } catch (NoSuchObjectException e) { - throw new MetaException(e.toString()); - } - } - - @Override - public void onAddPartition(AddPartitionEvent partitionEvent) - throws MetaException { - // Subscriber can get notification of newly add partition in a - // particular table by listening on a topic named "dbName.tableName" - // and message selector string as "HCAT_EVENT = HCAT_ADD_PARTITION" - if (partitionEvent.getStatus()) { - - Partition partition = partitionEvent.getPartition(); - String topicName = getTopicName(partition, partitionEvent); - if (topicName != null && !topicName.equals("")) { - send(messageFactory.buildAddPartitionMessage(partitionEvent.getTable(), partition), topicName); - } else { - LOG.info("Topic name not found in metastore. Suppressing HCatalog notification for " - + partition.getDbName() - + "." - + partition.getTableName() - + " To enable notifications for this table, please do alter table set properties (" - + HCatConstants.HCAT_MSGBUS_TOPIC_NAME - + "=.) or whatever you want topic name to be."); - } - } - - } - - /** - * Send dropped partition notifications. Subscribers can receive these notifications for a - * particular table by listening on a topic named "dbName.tableName" with message selector - * string {@value org.apache.hcatalog.common.HCatConstants#HCAT_EVENT} = - * {@value org.apache.hcatalog.common.HCatConstants#HCAT_DROP_PARTITION_EVENT}. - *
- * TODO: DataNucleus 2.0.3, currently used by the HiveMetaStore for persistence, has been - * found to throw NPE when serializing objects that contain null. For this reason we override - * some fields in the StorageDescriptor of this notification. This should be fixed after - * HIVE-2084 "Upgrade datanucleus from 2.0.3 to 3.0.1" is resolved. - */ - @Override - public void onDropPartition(DropPartitionEvent partitionEvent) throws MetaException { - if (partitionEvent.getStatus()) { - Partition partition = partitionEvent.getPartition(); - StorageDescriptor sd = partition.getSd(); - sd.setBucketCols(new ArrayList()); - sd.setSortCols(new ArrayList()); - sd.setParameters(new HashMap()); - sd.getSerdeInfo().setParameters(new HashMap()); - sd.getSkewedInfo().setSkewedColNames(new ArrayList()); - String topicName = getTopicName(partition, partitionEvent); - if (topicName != null && !topicName.equals("")) { - send(messageFactory.buildDropPartitionMessage(partitionEvent.getTable(), partition), topicName); - } else { - LOG.info("Topic name not found in metastore. Suppressing HCatalog notification for " - + partition.getDbName() - + "." - + partition.getTableName() - + " To enable notifications for this table, please do alter table set properties (" - + HCatConstants.HCAT_MSGBUS_TOPIC_NAME - + "=.) or whatever you want topic name to be."); - } - } - } - - @Override - public void onCreateDatabase(CreateDatabaseEvent dbEvent) - throws MetaException { - // Subscriber can get notification about addition of a database in HCAT - // by listening on a topic named "HCAT" and message selector string - // as "HCAT_EVENT = HCAT_ADD_DATABASE" - if (dbEvent.getStatus()) { - String topicName = getTopicPrefix(dbEvent.getHandler().getHiveConf()); - send(messageFactory.buildCreateDatabaseMessage(dbEvent.getDatabase()), topicName); - } - } - - @Override - public void onDropDatabase(DropDatabaseEvent dbEvent) throws MetaException { - // Subscriber can get notification about drop of a database in HCAT - // by listening on a topic named "HCAT" and message selector string - // as "HCAT_EVENT = HCAT_DROP_DATABASE" - if (dbEvent.getStatus()) { - String topicName = getTopicPrefix(dbEvent.getHandler().getHiveConf()); - send(messageFactory.buildDropDatabaseMessage(dbEvent.getDatabase()), topicName); - } - } - - @Override - public void onCreateTable(CreateTableEvent tableEvent) throws MetaException { - // Subscriber can get notification about addition of a table in HCAT - // by listening on a topic named "HCAT" and message selector string - // as "HCAT_EVENT = HCAT_ADD_TABLE" - if (tableEvent.getStatus()) { - Table tbl = tableEvent.getTable(); - HMSHandler handler = tableEvent.getHandler(); - HiveConf conf = handler.getHiveConf(); - Table newTbl; - try { - newTbl = handler.get_table(tbl.getDbName(), tbl.getTableName()) - .deepCopy(); - newTbl.getParameters().put( - HCatConstants.HCAT_MSGBUS_TOPIC_NAME, - getTopicPrefix(conf) + "." + newTbl.getDbName().toLowerCase() + "." - + newTbl.getTableName().toLowerCase()); - handler.alter_table(newTbl.getDbName(), newTbl.getTableName(), newTbl); - } catch (InvalidOperationException e) { - MetaException me = new MetaException(e.toString()); - me.initCause(e); - throw me; - } catch (NoSuchObjectException e) { - MetaException me = new MetaException(e.toString()); - me.initCause(e); - throw me; - } - String topicName = getTopicPrefix(conf) + "." + newTbl.getDbName().toLowerCase(); - send(messageFactory.buildCreateTableMessage(newTbl), topicName); - } - } - - private String getTopicPrefix(Configuration conf) { - return conf.get(HCatConstants.HCAT_MSGBUS_TOPIC_PREFIX, - HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX); - } - - /** - * Send dropped table notifications. Subscribers can receive these notifications for - * dropped tables by listening on topic "HCAT" with message selector string - * {@value org.apache.hcatalog.common.HCatConstants#HCAT_EVENT} = - * {@value org.apache.hcatalog.common.HCatConstants#HCAT_DROP_TABLE_EVENT} - *
- * TODO: DataNucleus 2.0.3, currently used by the HiveMetaStore for persistence, has been - * found to throw NPE when serializing objects that contain null. For this reason we override - * some fields in the StorageDescriptor of this notification. This should be fixed after - * HIVE-2084 "Upgrade datanucleus from 2.0.3 to 3.0.1" is resolved. - */ - @Override - public void onDropTable(DropTableEvent tableEvent) throws MetaException { - // Subscriber can get notification about drop of a table in HCAT - // by listening on a topic named "HCAT" and message selector string - // as "HCAT_EVENT = HCAT_DROP_TABLE" - - // Datanucleus throws NPE when we try to serialize a table object - // retrieved from metastore. To workaround that we reset following objects - - if (tableEvent.getStatus()) { - Table table = tableEvent.getTable(); - String topicName = getTopicPrefix(tableEvent.getHandler().getHiveConf()) + "." + table.getDbName().toLowerCase(); - send(messageFactory.buildDropTableMessage(table), topicName); - } - } - - /** - * @param hCatEventMessage The HCatEventMessage being sent over JMS. - * @param topicName is the name on message broker on which message is sent. - */ - protected void send(HCatEventMessage hCatEventMessage, String topicName) { - send(hCatEventMessage, topicName, NUM_RETRIES); - } - - /** - * @param hCatEventMessage The HCatEventMessage being sent over JMS, this method is threadsafe - * @param topicName is the name on message broker on which message is sent. - * @param retries the number of retry attempts - */ - protected void send(HCatEventMessage hCatEventMessage, String topicName, int retries) { - try { - if (session.get() == null) { - // Need to reconnect - throw new JMSException("Invalid JMS session"); - } - Destination topic = createTopic(topicName); - Message msg = session.get().createTextMessage(hCatEventMessage.toString()); - - msg.setStringProperty(HCatConstants.HCAT_EVENT, hCatEventMessage.getEventType().toString()); - msg.setStringProperty(HCatConstants.HCAT_MESSAGE_VERSION, messageFactory.getVersion()); - msg.setStringProperty(HCatConstants.HCAT_MESSAGE_FORMAT, messageFactory.getMessageFormat()); - MessageProducer producer = createProducer(topic); - producer.send(msg); - // Message must be transacted before we return. - session.get().commit(); - } catch (Exception e) { - if (retries >= 0) { - // this may happen if we were able to establish connection once, but its no longer valid - LOG.error("Seems like connection is lost. Will retry. Retries left : " + retries + ". error was:", e); - testAndCreateConnection(); - send(hCatEventMessage, topicName, retries - 1); - } else { - // Gobble up the exception. Message delivery is best effort. - LOG.error("Failed to send message on topic: " + topicName + - " event: " + hCatEventMessage.getEventType() + " after retries: " + NUM_RETRIES, e); - } - } - } - - /** - * Get the topic object for the topicName - * - * @param topicName The String identifying the message-topic. - * @return A {@link Topic} object corresponding to the specified topicName. - * @throws JMSException - */ - protected Topic createTopic(final String topicName) throws JMSException { - return session.get().createTopic(topicName); - } - - /** - * Does a health check on the connection by sending a dummy message. - * Create the connection if the connection is found to be bad - * Also recreates the session - */ - protected synchronized void testAndCreateConnection() { - if (conn != null) { - // This method is reached when error occurs while sending msg, so the session must be bad - session.remove(); - if (!isConnectionHealthy()) { - // I am the first thread to detect the error, cleanup old connection & reconnect - try { - conn.close(); - } catch (Exception e) { - LOG.error("Unable to close bad JMS connection, ignored error", e); - } - conn = createConnection(); - } - } else { - conn = createConnection(); - } - try { - session.set(createSession()); - } catch (JMSException e) { - LOG.error("Couldn't create JMS session, ignored the error", e); - } - } - - /** - * Create the JMS connection - * @return newly created JMS connection - */ - protected Connection createConnection() { - LOG.info("Will create new JMS connection"); - Context jndiCntxt; - Connection jmsConnection = null; - try { - jndiCntxt = new InitialContext(); - ConnectionFactory connFac = (ConnectionFactory) jndiCntxt.lookup("ConnectionFactory"); - jmsConnection = connFac.createConnection(); - jmsConnection.start(); - jmsConnection.setExceptionListener(new ExceptionListener() { - @Override - public void onException(JMSException jmse) { - LOG.error("JMS Exception listener received exception. Ignored the error", jmse); - } - }); - } catch (NamingException e) { - LOG.error("JNDI error while setting up Message Bus connection. " - + "Please make sure file named 'jndi.properties' is in " - + "classpath and contains appropriate key-value pairs.", e); - } catch (JMSException e) { - LOG.error("Failed to initialize connection to message bus", e); - } catch (Throwable t) { - LOG.error("Unable to connect to JMS provider", t); - } - return jmsConnection; - } - - /** - * Send a dummy message to probe if the JMS connection is healthy - * @return true if connection is healthy, false otherwise - */ - protected boolean isConnectionHealthy() { - try { - Topic topic = createTopic(getTopicPrefix(getConf()) + "." + HEALTH_CHECK_TOPIC_SUFFIX); - MessageProducer producer = createProducer(topic); - Message msg = session.get().createTextMessage(HEALTH_CHECK_MSG); - producer.send(msg, DeliveryMode.NON_PERSISTENT, 4, 0); - } catch (Exception e) { - return false; - } - return true; - } - - /** - * Creates a JMS session - * @return newly create JMS session - * @throws JMSException - */ - protected Session createSession() throws JMSException { - // We want message to be sent when session commits, thus we run in - // transacted mode. - return conn.createSession(true, Session.SESSION_TRANSACTED); - } - - /** - * Create a JMS producer - * @param topic - * @return newly created message producer - * @throws JMSException - */ - protected MessageProducer createProducer(Destination topic) throws JMSException { - return session.get().createProducer(topic); - } - - @Override - protected void finalize() throws Throwable { - if (conn != null) { - try { - conn.close(); - } catch (Exception e) { - LOG.error("Couldn't close jms connection, ignored the error", e); - } - } - } - - @Override - public void onLoadPartitionDone(LoadPartitionDoneEvent lpde) - throws MetaException { -// TODO: Fix LoadPartitionDoneEvent. Currently, LPDE can only carry a single partition-spec. And that defeats the purpose. -// if(lpde.getStatus()) -// send(lpde.getPartitionName(),lpde.getTable().getParameters().get(HCatConstants.HCAT_MSGBUS_TOPIC_NAME),HCatConstants.HCAT_PARTITION_DONE_EVENT); - } - - @Override - public void onAlterPartition(AlterPartitionEvent ape) throws MetaException { - // no-op - } - - @Override - public void onAlterTable(AlterTableEvent ate) throws MetaException { - // no-op - } -} diff --git hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/AddPartitionMessage.java hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/AddPartitionMessage.java deleted file mode 100644 index 493e8b2..0000000 --- hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/AddPartitionMessage.java +++ /dev/null @@ -1,54 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.messaging; - -import java.util.List; -import java.util.Map; - -/** - * The HCat message sent when partition(s) are added to a table. - */ -public abstract class AddPartitionMessage extends HCatEventMessage { - - protected AddPartitionMessage() { - super(EventType.ADD_PARTITION); - } - - /** - * Getter for name of table (where partitions are added). - * @return Table-name (String). - */ - public abstract String getTable(); - - /** - * Getter for list of partitions added. - * @return List of maps, where each map identifies values for each partition-key, for every added partition. - */ - public abstract List> getPartitions (); - - @Override - public HCatEventMessage checkValid() { - if (getTable() == null) - throw new IllegalStateException("Table name unset."); - if (getPartitions() == null) - throw new IllegalStateException("Partition-list unset."); - return super.checkValid(); - } -} diff --git hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/CreateDatabaseMessage.java hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/CreateDatabaseMessage.java deleted file mode 100644 index 1d663f9..0000000 --- hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/CreateDatabaseMessage.java +++ /dev/null @@ -1,31 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.messaging; - -/** - * HCat message sent when a Database is created in HCatalog. - */ -public abstract class CreateDatabaseMessage extends HCatEventMessage { - - protected CreateDatabaseMessage() { - super(EventType.CREATE_DATABASE); - } - -} diff --git hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/CreateTableMessage.java hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/CreateTableMessage.java deleted file mode 100644 index b92b373..0000000 --- hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/CreateTableMessage.java +++ /dev/null @@ -1,43 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.messaging; - -/** - * HCat message sent when a table is created in HCatalog. - */ -public abstract class CreateTableMessage extends HCatEventMessage { - - protected CreateTableMessage() { - super(EventType.CREATE_TABLE); - } - - /** - * Getter for the name of table created in HCatalog. - * @return Table-name (String). - */ - public abstract String getTable(); - - @Override - public HCatEventMessage checkValid() { - if (getTable() == null) - throw new IllegalStateException("Table name unset."); - return super.checkValid(); - } -} diff --git hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/DropDatabaseMessage.java hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/DropDatabaseMessage.java deleted file mode 100644 index f5f4627..0000000 --- hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/DropDatabaseMessage.java +++ /dev/null @@ -1,30 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.messaging; - -/** - * HCat message sent when a Database is dropped from HCatalog. - */ -public abstract class DropDatabaseMessage extends HCatEventMessage { - - protected DropDatabaseMessage() { - super(EventType.DROP_DATABASE); - } -} diff --git hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/DropPartitionMessage.java hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/DropPartitionMessage.java deleted file mode 100644 index 9093ecb..0000000 --- hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/DropPartitionMessage.java +++ /dev/null @@ -1,45 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.messaging; - -import java.util.List; -import java.util.Map; - -/** - * HCat message sent when a partition is dropped in HCatalog. - */ -public abstract class DropPartitionMessage extends HCatEventMessage { - - protected DropPartitionMessage() { - super(EventType.DROP_PARTITION); - } - - public abstract String getTable(); - public abstract List> getPartitions (); - - @Override - public HCatEventMessage checkValid() { - if (getTable() == null) - throw new IllegalStateException("Table name unset."); - if (getPartitions() == null) - throw new IllegalStateException("Partition-list unset."); - return super.checkValid(); - } -} diff --git hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/DropTableMessage.java hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/DropTableMessage.java deleted file mode 100644 index d984d30..0000000 --- hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/DropTableMessage.java +++ /dev/null @@ -1,43 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.messaging; - -/** - * HCat message sent when a Table is dropped in HCatalog. - */ -public abstract class DropTableMessage extends HCatEventMessage { - - protected DropTableMessage() { - super(EventType.DROP_TABLE); - } - - /** - * Getter for the name of the table being dropped. - * @return Table-name (String). - */ - public abstract String getTable(); - - @Override - public HCatEventMessage checkValid() { - if (getTable() == null) - throw new IllegalStateException("Table name unset."); - return super.checkValid(); - } -} diff --git hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/HCatEventMessage.java hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/HCatEventMessage.java deleted file mode 100644 index b9926d4..0000000 --- hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/HCatEventMessage.java +++ /dev/null @@ -1,100 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.messaging; - -import org.apache.hcatalog.common.HCatConstants; - -/** - * Class representing messages emitted when Metastore operations are done. - * (E.g. Creation and deletion of databases, tables and partitions.) - */ -public abstract class HCatEventMessage { - - /** - * Enumeration of all supported types of Metastore operations. - */ - public static enum EventType { - - CREATE_DATABASE(HCatConstants.HCAT_CREATE_DATABASE_EVENT), - DROP_DATABASE(HCatConstants.HCAT_DROP_DATABASE_EVENT), - CREATE_TABLE(HCatConstants.HCAT_CREATE_TABLE_EVENT), - DROP_TABLE(HCatConstants.HCAT_DROP_TABLE_EVENT), - ADD_PARTITION(HCatConstants.HCAT_ADD_PARTITION_EVENT), - DROP_PARTITION(HCatConstants.HCAT_DROP_PARTITION_EVENT); - - private String typeString; - - EventType(String typeString) { - this.typeString = typeString; - } - - @Override - public String toString() { return typeString; } - } - - protected EventType eventType; - - protected HCatEventMessage(EventType eventType) { - this.eventType = eventType; - } - - public EventType getEventType() { - return eventType; - } - - /** - * Getter for HCatalog Server's URL. - * (This is where the event originates from.) - * @return HCatalog Server's URL (String). - */ - public abstract String getServer(); - - /** - * Getter for the Kerberos principal of the HCatalog service. - * @return HCatalog Service Principal (String). - */ - public abstract String getServicePrincipal(); - - /** - * Getter for the name of the Database on which the Metastore operation is done. - * @return Database-name (String). - */ - public abstract String getDB(); - - /** - * Getter for the timestamp associated with the operation. - * @return Timestamp (Long - seconds since epoch). - */ - public abstract Long getTimestamp(); - - /** - * Class invariant. Checked after construction or deserialization. - */ - public HCatEventMessage checkValid() { - if (getServer() == null || getServicePrincipal() == null) - throw new IllegalStateException("Server-URL/Service-Principal shouldn't be null."); - if (getEventType() == null) - throw new IllegalStateException("Event-type unset."); - if (getDB() == null) - throw new IllegalArgumentException("DB-name unset."); - - return this; - } -} diff --git hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/MessageDeserializer.java hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/MessageDeserializer.java deleted file mode 100644 index 38a2622..0000000 --- hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/MessageDeserializer.java +++ /dev/null @@ -1,83 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.messaging; - -/** - * Interface for converting HCat events from String-form back to HCatEventMessage instances. - */ -public abstract class MessageDeserializer { - - /** - * Method to construct HCatEventMessage from string. - */ - public HCatEventMessage getHCatEventMessage(String eventTypeString, String messageBody) { - - switch (HCatEventMessage.EventType.valueOf(eventTypeString)) { - case CREATE_DATABASE: - return getCreateDatabaseMessage(messageBody); - case DROP_DATABASE: - return getDropDatabaseMessage(messageBody); - case CREATE_TABLE: - return getCreateTableMessage(messageBody); - case DROP_TABLE: - return getDropTableMessage(messageBody); - case ADD_PARTITION: - return getAddPartitionMessage(messageBody); - case DROP_PARTITION: - return getDropPartitionMessage(messageBody); - - default: - throw new IllegalArgumentException("Unsupported event-type: " + eventTypeString); - } - } - - /** - * Method to de-serialize CreateDatabaseMessage instance. - */ - public abstract CreateDatabaseMessage getCreateDatabaseMessage(String messageBody); - - /** - * Method to de-serialize DropDatabaseMessage instance. - */ - public abstract DropDatabaseMessage getDropDatabaseMessage(String messageBody); - - /** - * Method to de-serialize CreateTableMessage instance. - */ - public abstract CreateTableMessage getCreateTableMessage(String messageBody); - - /** - * Method to de-serialize DropTableMessage instance. - */ - public abstract DropTableMessage getDropTableMessage(String messageBody); - - /** - * Method to de-serialize AddPartitionMessage instance. - */ - public abstract AddPartitionMessage getAddPartitionMessage(String messageBody); - - /** - * Method to de-serialize DropPartitionMessage instance. - */ - public abstract DropPartitionMessage getDropPartitionMessage(String messageBody); - - // Protection against construction. - protected MessageDeserializer() {} -} diff --git hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/MessageFactory.java hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/MessageFactory.java deleted file mode 100644 index b88ffcb..0000000 --- hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/MessageFactory.java +++ /dev/null @@ -1,138 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.messaging; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.util.ReflectionUtils; -import org.apache.hcatalog.messaging.json.JSONMessageFactory; - -/** - * Abstract Factory for the construction of HCatalog message instances. - */ -public abstract class MessageFactory { - - private static MessageFactory instance = new JSONMessageFactory(); - - protected static final HiveConf hiveConf = new HiveConf(); - static { - hiveConf.addResource("hive-site.xml"); - } - - private static final String CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX = "hcatalog.message.factory.impl."; - private static final String CONF_LABEL_HCAT_MESSAGE_FORMAT = "hcatalog.message.format"; - private static final String HCAT_MESSAGE_FORMAT = hiveConf.get(CONF_LABEL_HCAT_MESSAGE_FORMAT, "json"); - private static final String DEFAULT_MESSAGE_FACTORY_IMPL = "org.apache.hcatalog.messaging.json.JSONMessageFactory"; - private static final String HCAT_MESSAGE_FACTORY_IMPL = hiveConf.get(CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX - + HCAT_MESSAGE_FORMAT, - DEFAULT_MESSAGE_FACTORY_IMPL); - - protected static final String HCAT_SERVER_URL = hiveConf.get(HiveConf.ConfVars.METASTOREURIS.name(), ""); - protected static final String HCAT_SERVICE_PRINCIPAL = hiveConf.get(HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL.name(), ""); - - /** - * Getter for MessageFactory instance. - */ - public static MessageFactory getInstance() { - if (instance == null) { - instance = getInstance(HCAT_MESSAGE_FACTORY_IMPL); - } - return instance; - } - - private static MessageFactory getInstance(String className) { - try { - return (MessageFactory)ReflectionUtils.newInstance(Class.forName(className), hiveConf); - } - catch (ClassNotFoundException classNotFound) { - throw new IllegalStateException("Could not construct MessageFactory implementation: ", classNotFound); - } - } - - /** - * Getter for MessageDeserializer, corresponding to the specified format and version. - * @param format Serialization format for notifications. - * @param version Version of serialization format (currently ignored.) - * @return MessageDeserializer. - */ - public static MessageDeserializer getDeserializer(String format, - String version) { - return getInstance(hiveConf.get(CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX + format, - DEFAULT_MESSAGE_FACTORY_IMPL)).getDeserializer(); - } - - public abstract MessageDeserializer getDeserializer(); - - /** - * Getter for version-string, corresponding to all constructed messages. - */ - public abstract String getVersion(); - - /** - * Getter for message-format. - */ - public abstract String getMessageFormat(); - - /** - * Factory method for CreateDatabaseMessage. - * @param db The Database being added. - * @return CreateDatabaseMessage instance. - */ - public abstract CreateDatabaseMessage buildCreateDatabaseMessage(Database db); - - /** - * Factory method for DropDatabaseMessage. - * @param db The Database being dropped. - * @return DropDatabaseMessage instance. - */ - public abstract DropDatabaseMessage buildDropDatabaseMessage(Database db); - - /** - * Factory method for CreateTableMessage. - * @param table The Table being created. - * @return CreateTableMessage instance. - */ - public abstract CreateTableMessage buildCreateTableMessage(Table table); - - /** - * Factory method for DropTableMessage. - * @param table The Table being dropped. - * @return DropTableMessage instance. - */ - public abstract DropTableMessage buildDropTableMessage(Table table); - - /** - * Factory method for AddPartitionMessage. - * @param table The Table to which the partition is added. - * @param partition The Partition being added. - * @return AddPartitionMessage instance. - */ - public abstract AddPartitionMessage buildAddPartitionMessage(Table table, Partition partition); - - /** - * Factory method for DropPartitionMessage. - * @param table The Table from which the partition is dropped. - * @param partition The Partition being dropped. - * @return DropPartitionMessage instance. - */ - public abstract DropPartitionMessage buildDropPartitionMessage(Table table, Partition partition); -} diff --git hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/jms/MessagingUtils.java hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/jms/MessagingUtils.java deleted file mode 100644 index c627aca..0000000 --- hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/jms/MessagingUtils.java +++ /dev/null @@ -1,62 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.messaging.jms; - -import org.apache.commons.lang.StringUtils; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.messaging.HCatEventMessage; -import org.apache.hcatalog.messaging.MessageFactory; - -import javax.jms.JMSException; -import javax.jms.Message; -import javax.jms.TextMessage; - -/** - * Helper Utility to assist consumers of HCat Messages in extracting - * message-content from JMS messages. - */ -public class MessagingUtils { - - /** - * Method to return HCatEventMessage contained in the JMS message. - * @param message The JMS Message instance - * @return The contained HCatEventMessage - */ - public static HCatEventMessage getMessage(Message message) { - try { - String messageBody = ((TextMessage)message).getText(); - String eventType = message.getStringProperty(HCatConstants.HCAT_EVENT); - String messageVersion = message.getStringProperty(HCatConstants.HCAT_MESSAGE_VERSION); - String messageFormat = message.getStringProperty(HCatConstants.HCAT_MESSAGE_FORMAT); - - if (StringUtils.isEmpty(messageBody) || StringUtils.isEmpty(eventType)) - throw new IllegalArgumentException("Could not extract HCatEventMessage. " + - "EventType and/or MessageBody is null/empty."); - - return MessageFactory.getDeserializer(messageFormat, messageVersion).getHCatEventMessage(eventType, messageBody); - } - catch (JMSException exception) { - throw new IllegalArgumentException("Could not extract HCatEventMessage. ", exception); - } - } - - // Prevent construction. - private MessagingUtils() {} -} diff --git hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONAddPartitionMessage.java hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONAddPartitionMessage.java deleted file mode 100644 index 77d6e18..0000000 --- hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONAddPartitionMessage.java +++ /dev/null @@ -1,85 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.messaging.json; - -import org.apache.hcatalog.messaging.AddPartitionMessage; -import org.codehaus.jackson.annotate.JsonProperty; - -import java.util.List; -import java.util.Map; - -/** - * JSON implementation of AddPartitionMessage. - */ -public class JSONAddPartitionMessage extends AddPartitionMessage { - - @JsonProperty - String server, servicePrincipal, db, table; - - @JsonProperty - Long timestamp; - - @JsonProperty - List> partitions; - - /** - * Default Constructor. Required for Jackson. - */ - public JSONAddPartitionMessage() {} - - public JSONAddPartitionMessage(String server, String servicePrincipal, String db, String table, - List> partitions, Long timestamp) { - this.server = server; - this.servicePrincipal = servicePrincipal; - this.db = db; - this.table = table; - this.partitions = partitions; - this.timestamp = timestamp; - checkValid(); - } - - @Override - public String getServer() { return server; } - - @Override - public String getServicePrincipal() { return servicePrincipal; } - - @Override - public String getDB() { return db; } - - @Override - public String getTable() { return table; } - - @Override - public Long getTimestamp() { return timestamp; } - - @Override - public List> getPartitions () { return partitions; } - - @Override - public String toString() { - try { - return JSONMessageDeserializer.mapper.writeValueAsString(this); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not serialize: ", exception); - } - } -} diff --git hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONCreateDatabaseMessage.java hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONCreateDatabaseMessage.java deleted file mode 100644 index 9786210..0000000 --- hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONCreateDatabaseMessage.java +++ /dev/null @@ -1,71 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.messaging.json; - -import org.apache.hcatalog.messaging.CreateDatabaseMessage; -import org.codehaus.jackson.annotate.JsonProperty; - -/** - * JSON Implementation of CreateDatabaseMessage. - */ -public class JSONCreateDatabaseMessage extends CreateDatabaseMessage { - - @JsonProperty - String server, servicePrincipal, db; - - @JsonProperty - Long timestamp; - - /** - * Default constructor, required for Jackson. - */ - public JSONCreateDatabaseMessage() {} - - public JSONCreateDatabaseMessage(String server, String servicePrincipal, String db, Long timestamp) { - this.server = server; - this.servicePrincipal = servicePrincipal; - this.db = db; - this.timestamp = timestamp; - checkValid(); - } - - @Override - public String getDB() { return db; } - - @Override - public String getServer() { return server; } - - @Override - public String getServicePrincipal() { return servicePrincipal; } - - @Override - public Long getTimestamp() { return timestamp; } - - @Override - public String toString() { - try { - return JSONMessageDeserializer.mapper.writeValueAsString(this); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not serialize: ", exception); - } - } - -} diff --git hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONCreateTableMessage.java hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONCreateTableMessage.java deleted file mode 100644 index c1f0017..0000000 --- hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONCreateTableMessage.java +++ /dev/null @@ -1,74 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.messaging.json; - -import org.apache.hcatalog.messaging.CreateTableMessage; -import org.codehaus.jackson.annotate.JsonProperty; - -/** - * JSON implementation of CreateTableMessage. - */ -public class JSONCreateTableMessage extends CreateTableMessage { - - @JsonProperty - String server, servicePrincipal, db, table; - - @JsonProperty - Long timestamp; - - /** - * Default constructor, needed for Jackson. - */ - public JSONCreateTableMessage() {} - - public JSONCreateTableMessage(String server, String servicePrincipal, String db, String table, Long timestamp) { - this.server = server; - this.servicePrincipal = servicePrincipal; - this.db = db; - this.table = table; - this.timestamp = timestamp; - checkValid(); - } - - @Override - public String getServer() { return server; } - - @Override - public String getServicePrincipal() { return servicePrincipal; } - - @Override - public String getDB() { return db; } - - @Override - public Long getTimestamp() { return timestamp; } - - @Override - public String getTable() { return table; } - - @Override - public String toString() { - try { - return JSONMessageDeserializer.mapper.writeValueAsString(this); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not serialize: ", exception); - } - } -} diff --git hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONDropDatabaseMessage.java hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONDropDatabaseMessage.java deleted file mode 100644 index 544c1c7..0000000 --- hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONDropDatabaseMessage.java +++ /dev/null @@ -1,71 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.messaging.json; - -import org.apache.hcatalog.messaging.DropDatabaseMessage; -import org.codehaus.jackson.annotate.JsonProperty; - -/** - * JSON implementation of DropDatabaseMessage. - */ -public class JSONDropDatabaseMessage extends DropDatabaseMessage { - - @JsonProperty - String server, servicePrincipal, db; - - @JsonProperty - Long timestamp; - - /** - * Default constructor, required for Jackson. - */ - public JSONDropDatabaseMessage() {} - - public JSONDropDatabaseMessage(String server, String servicePrincipal, String db, Long timestamp) { - this.server = server; - this.servicePrincipal = servicePrincipal; - this.db = db; - this.timestamp = timestamp; - checkValid(); - } - - - @Override - public String getServer() { return server; } - - @Override - public String getServicePrincipal() { return servicePrincipal; } - - @Override - public String getDB() { return db; } - - @Override - public Long getTimestamp() { return timestamp; } - - @Override - public String toString() { - try { - return JSONMessageDeserializer.mapper.writeValueAsString(this); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not serialize: ", exception); - } - } -} diff --git hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONDropPartitionMessage.java hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONDropPartitionMessage.java deleted file mode 100644 index 0628364..0000000 --- hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONDropPartitionMessage.java +++ /dev/null @@ -1,86 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.messaging.json; - -import org.apache.hcatalog.messaging.DropPartitionMessage; -import org.codehaus.jackson.annotate.JsonProperty; - -import java.util.List; -import java.util.Map; - -/** - * JSON implementation of DropPartitionMessage. - */ -public class JSONDropPartitionMessage extends DropPartitionMessage { - - @JsonProperty - String server, servicePrincipal, db, table; - - @JsonProperty - Long timestamp; - - @JsonProperty - List> partitions; - - /** - * Default Constructor. Required for Jackson. - */ - public JSONDropPartitionMessage() {} - - public JSONDropPartitionMessage(String server, String servicePrincipal, String db, String table, - List> partitions, Long timestamp) { - this.server = server; - this.servicePrincipal = servicePrincipal; - this.db = db; - this.table = table; - this.partitions = partitions; - this.timestamp = timestamp; - checkValid(); - } - - - @Override - public String getServer() { return server; } - - @Override - public String getServicePrincipal() { return servicePrincipal; } - - @Override - public String getDB() { return db; } - - @Override - public String getTable() { return table; } - - @Override - public Long getTimestamp() { return timestamp; } - - @Override - public List> getPartitions () { return partitions; } - - @Override - public String toString() { - try { - return JSONMessageDeserializer.mapper.writeValueAsString(this); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not serialize: ", exception); - } - } -} diff --git hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONDropTableMessage.java hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONDropTableMessage.java deleted file mode 100644 index fac6a11..0000000 --- hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONDropTableMessage.java +++ /dev/null @@ -1,76 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.messaging.json; - -import org.apache.hcatalog.messaging.DropTableMessage; -import org.codehaus.jackson.annotate.JsonProperty; - -/** - * JSON implementation of DropTableMessage. - */ -public class JSONDropTableMessage extends DropTableMessage { - - @JsonProperty - String server, servicePrincipal, db, table; - - @JsonProperty - Long timestamp; - - /** - * Default constructor, needed for Jackson. - */ - public JSONDropTableMessage() {} - - public JSONDropTableMessage(String server, String servicePrincipal, String db, String table, Long timestamp) { - this.server = server; - this.servicePrincipal = servicePrincipal; - this.db = db; - this.table = table; - this.timestamp = timestamp; - checkValid(); - } - - - @Override - public String getTable() { return table; } - - @Override - public String getServer() { return server; } - - @Override - public String getServicePrincipal() { return servicePrincipal; } - - @Override - public String getDB() { return db; } - - @Override - public Long getTimestamp() { return timestamp; } - - @Override - public String toString() { - try { - return JSONMessageDeserializer.mapper.writeValueAsString(this); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not serialize: ", exception); - } - } - -} diff --git hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONMessageDeserializer.java hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONMessageDeserializer.java deleted file mode 100644 index d93b880..0000000 --- hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONMessageDeserializer.java +++ /dev/null @@ -1,102 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.messaging.json; - -import org.apache.hcatalog.messaging.AddPartitionMessage; -import org.apache.hcatalog.messaging.CreateDatabaseMessage; -import org.apache.hcatalog.messaging.CreateTableMessage; -import org.apache.hcatalog.messaging.DropDatabaseMessage; -import org.apache.hcatalog.messaging.DropPartitionMessage; -import org.apache.hcatalog.messaging.DropTableMessage; -import org.apache.hcatalog.messaging.MessageDeserializer; -import org.codehaus.jackson.map.DeserializationConfig; -import org.codehaus.jackson.map.ObjectMapper; - -/** - * MessageDeserializer implementation, for deserializing from JSON strings. - */ -public class JSONMessageDeserializer extends MessageDeserializer { - - static ObjectMapper mapper = new ObjectMapper(); // Thread-safe. - - static { - mapper.configure(DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES, false); - } - - @Override - public CreateDatabaseMessage getCreateDatabaseMessage(String messageBody) { - try { - return mapper.readValue(messageBody, JSONCreateDatabaseMessage.class); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not construct JSONCreateDatabaseMessage.", exception); - } - } - - @Override - public DropDatabaseMessage getDropDatabaseMessage(String messageBody) { - try { - return mapper.readValue(messageBody, JSONDropDatabaseMessage.class); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not construct JSONDropDatabaseMessage.", exception); - } - } - - @Override - public CreateTableMessage getCreateTableMessage(String messageBody) { - try { - return mapper.readValue(messageBody, JSONCreateTableMessage.class); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not construct JSONCreateTableMessage.", exception); - } - } - - @Override - public DropTableMessage getDropTableMessage(String messageBody) { - try { - return mapper.readValue(messageBody, JSONDropTableMessage.class); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not construct JSONDropTableMessage.", exception); - } - } - - @Override - public AddPartitionMessage getAddPartitionMessage(String messageBody) { - try { - return mapper.readValue(messageBody, JSONAddPartitionMessage.class); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not construct AddPartitionMessage.", exception); - } - } - - @Override - public DropPartitionMessage getDropPartitionMessage(String messageBody) { - try { - return mapper.readValue(messageBody, JSONDropPartitionMessage.class); - } - catch (Exception exception) { - throw new IllegalArgumentException("Could not construct DropPartitionMessage.", exception); - } - } -} diff --git hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONMessageFactory.java hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONMessageFactory.java deleted file mode 100644 index 7639ad9..0000000 --- hcatalog/server-extensions/src/main/java/org/apache/hcatalog/messaging/json/JSONMessageFactory.java +++ /dev/null @@ -1,106 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.messaging.json; - -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hcatalog.messaging.AddPartitionMessage; -import org.apache.hcatalog.messaging.CreateDatabaseMessage; -import org.apache.hcatalog.messaging.CreateTableMessage; -import org.apache.hcatalog.messaging.DropDatabaseMessage; -import org.apache.hcatalog.messaging.DropPartitionMessage; -import org.apache.hcatalog.messaging.DropTableMessage; -import org.apache.hcatalog.messaging.MessageDeserializer; -import org.apache.hcatalog.messaging.MessageFactory; - -import java.util.Arrays; -import java.util.LinkedHashMap; -import java.util.Map; - -/** - * The JSON implementation of the MessageFactory. Constructs JSON implementations of - * each message-type. - */ -public class JSONMessageFactory extends MessageFactory { - - private static JSONMessageDeserializer deserializer = new JSONMessageDeserializer(); - - @Override - public MessageDeserializer getDeserializer() { - return deserializer; - } - - @Override - public String getVersion() { - return "0.1"; - } - - @Override - public String getMessageFormat() { - return "json"; - } - - @Override - public CreateDatabaseMessage buildCreateDatabaseMessage(Database db) { - return new JSONCreateDatabaseMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, db.getName(), - System.currentTimeMillis() / 1000); - } - - @Override - public DropDatabaseMessage buildDropDatabaseMessage(Database db) { - return new JSONDropDatabaseMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, db.getName(), - System.currentTimeMillis() / 1000); - } - - @Override - public CreateTableMessage buildCreateTableMessage(Table table) { - return new JSONCreateTableMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, table.getDbName(), - table.getTableName(), System.currentTimeMillis()/1000); - } - - @Override - public DropTableMessage buildDropTableMessage(Table table) { - return new JSONDropTableMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, table.getDbName(), table.getTableName(), - System.currentTimeMillis()/1000); - } - - @Override - public AddPartitionMessage buildAddPartitionMessage(Table table, Partition partition) { - return new JSONAddPartitionMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, partition.getDbName(), - partition.getTableName(), Arrays.asList(getPartitionKeyValues(table, partition)), - System.currentTimeMillis()/1000); - } - - @Override - public DropPartitionMessage buildDropPartitionMessage(Table table, Partition partition) { - return new JSONDropPartitionMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, partition.getDbName(), - partition.getTableName(), Arrays.asList(getPartitionKeyValues(table, partition)), - System.currentTimeMillis()/1000); - } - - private static Map getPartitionKeyValues(Table table, Partition partition) { - Map partitionKeys = new LinkedHashMap(); - for (int i=0; i session = new ThreadLocal() { + @Override + protected Session initialValue() { + try { + return createSession(); + } catch (Exception e) { + LOG.error("Couldn't create JMS Session", e); + return null; + } + } + + @Override + public void remove() { + if (get() != null) { + try { + get().close(); + } catch (Exception e) { + LOG.error("Unable to close bad JMS session, ignored error", e); + } + } + super.remove(); + } + }; + + /** + * Create message bus connection and session in constructor. + */ + public NotificationListener(final Configuration conf) { + super(conf); + testAndCreateConnection(); + } + + private static String getTopicName(Partition partition, + ListenerEvent partitionEvent) throws MetaException { + try { + return partitionEvent.getHandler() + .get_table(partition.getDbName(), partition.getTableName()) + .getParameters().get(HCatConstants.HCAT_MSGBUS_TOPIC_NAME); + } catch (NoSuchObjectException e) { + throw new MetaException(e.toString()); + } + } + + @Override + public void onAddPartition(AddPartitionEvent partitionEvent) + throws MetaException { + // Subscriber can get notification of newly add partition in a + // particular table by listening on a topic named "dbName.tableName" + // and message selector string as "HCAT_EVENT = HCAT_ADD_PARTITION" + if (partitionEvent.getStatus()) { + + Partition partition = partitionEvent.getPartition(); + String topicName = getTopicName(partition, partitionEvent); + if (topicName != null && !topicName.equals("")) { + send(messageFactory.buildAddPartitionMessage(partitionEvent.getTable(), partition), topicName); + } else { + LOG.info("Topic name not found in metastore. Suppressing HCatalog notification for " + + partition.getDbName() + + "." + + partition.getTableName() + + " To enable notifications for this table, please do alter table set properties (" + + HCatConstants.HCAT_MSGBUS_TOPIC_NAME + + "=.) or whatever you want topic name to be."); + } + } + + } + + /** + * Send dropped partition notifications. Subscribers can receive these notifications for a + * particular table by listening on a topic named "dbName.tableName" with message selector + * string {@value org.apache.hive.hcatalog.common.HCatConstants#HCAT_EVENT} = + * {@value org.apache.hive.hcatalog.common.HCatConstants#HCAT_DROP_PARTITION_EVENT}. + *
+ * TODO: DataNucleus 2.0.3, currently used by the HiveMetaStore for persistence, has been + * found to throw NPE when serializing objects that contain null. For this reason we override + * some fields in the StorageDescriptor of this notification. This should be fixed after + * HIVE-2084 "Upgrade datanucleus from 2.0.3 to 3.0.1" is resolved. + */ + @Override + public void onDropPartition(DropPartitionEvent partitionEvent) throws MetaException { + if (partitionEvent.getStatus()) { + Partition partition = partitionEvent.getPartition(); + StorageDescriptor sd = partition.getSd(); + sd.setBucketCols(new ArrayList()); + sd.setSortCols(new ArrayList()); + sd.setParameters(new HashMap()); + sd.getSerdeInfo().setParameters(new HashMap()); + sd.getSkewedInfo().setSkewedColNames(new ArrayList()); + String topicName = getTopicName(partition, partitionEvent); + if (topicName != null && !topicName.equals("")) { + send(messageFactory.buildDropPartitionMessage(partitionEvent.getTable(), partition), topicName); + } else { + LOG.info("Topic name not found in metastore. Suppressing HCatalog notification for " + + partition.getDbName() + + "." + + partition.getTableName() + + " To enable notifications for this table, please do alter table set properties (" + + HCatConstants.HCAT_MSGBUS_TOPIC_NAME + + "=.) or whatever you want topic name to be."); + } + } + } + + @Override + public void onCreateDatabase(CreateDatabaseEvent dbEvent) + throws MetaException { + // Subscriber can get notification about addition of a database in HCAT + // by listening on a topic named "HCAT" and message selector string + // as "HCAT_EVENT = HCAT_ADD_DATABASE" + if (dbEvent.getStatus()) { + String topicName = getTopicPrefix(dbEvent.getHandler().getHiveConf()); + send(messageFactory.buildCreateDatabaseMessage(dbEvent.getDatabase()), topicName); + } + } + + @Override + public void onDropDatabase(DropDatabaseEvent dbEvent) throws MetaException { + // Subscriber can get notification about drop of a database in HCAT + // by listening on a topic named "HCAT" and message selector string + // as "HCAT_EVENT = HCAT_DROP_DATABASE" + if (dbEvent.getStatus()) { + String topicName = getTopicPrefix(dbEvent.getHandler().getHiveConf()); + send(messageFactory.buildDropDatabaseMessage(dbEvent.getDatabase()), topicName); + } + } + + @Override + public void onCreateTable(CreateTableEvent tableEvent) throws MetaException { + // Subscriber can get notification about addition of a table in HCAT + // by listening on a topic named "HCAT" and message selector string + // as "HCAT_EVENT = HCAT_ADD_TABLE" + if (tableEvent.getStatus()) { + Table tbl = tableEvent.getTable(); + HMSHandler handler = tableEvent.getHandler(); + HiveConf conf = handler.getHiveConf(); + Table newTbl; + try { + newTbl = handler.get_table(tbl.getDbName(), tbl.getTableName()) + .deepCopy(); + newTbl.getParameters().put( + HCatConstants.HCAT_MSGBUS_TOPIC_NAME, + getTopicPrefix(conf) + "." + newTbl.getDbName().toLowerCase() + "." + + newTbl.getTableName().toLowerCase()); + handler.alter_table(newTbl.getDbName(), newTbl.getTableName(), newTbl); + } catch (InvalidOperationException e) { + MetaException me = new MetaException(e.toString()); + me.initCause(e); + throw me; + } catch (NoSuchObjectException e) { + MetaException me = new MetaException(e.toString()); + me.initCause(e); + throw me; + } + String topicName = getTopicPrefix(conf) + "." + newTbl.getDbName().toLowerCase(); + send(messageFactory.buildCreateTableMessage(newTbl), topicName); + } + } + + private String getTopicPrefix(Configuration conf) { + return conf.get(HCatConstants.HCAT_MSGBUS_TOPIC_PREFIX, + HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX); + } + + /** + * Send dropped table notifications. Subscribers can receive these notifications for + * dropped tables by listening on topic "HCAT" with message selector string + * {@value org.apache.hive.hcatalog.common.HCatConstants#HCAT_EVENT} = + * {@value org.apache.hive.hcatalog.common.HCatConstants#HCAT_DROP_TABLE_EVENT} + *
+ * TODO: DataNucleus 2.0.3, currently used by the HiveMetaStore for persistence, has been + * found to throw NPE when serializing objects that contain null. For this reason we override + * some fields in the StorageDescriptor of this notification. This should be fixed after + * HIVE-2084 "Upgrade datanucleus from 2.0.3 to 3.0.1" is resolved. + */ + @Override + public void onDropTable(DropTableEvent tableEvent) throws MetaException { + // Subscriber can get notification about drop of a table in HCAT + // by listening on a topic named "HCAT" and message selector string + // as "HCAT_EVENT = HCAT_DROP_TABLE" + + // Datanucleus throws NPE when we try to serialize a table object + // retrieved from metastore. To workaround that we reset following objects + + if (tableEvent.getStatus()) { + Table table = tableEvent.getTable(); + String topicName = getTopicPrefix(tableEvent.getHandler().getHiveConf()) + "." + table.getDbName().toLowerCase(); + send(messageFactory.buildDropTableMessage(table), topicName); + } + } + + /** + * @param hCatEventMessage The HCatEventMessage being sent over JMS. + * @param topicName is the name on message broker on which message is sent. + */ + protected void send(HCatEventMessage hCatEventMessage, String topicName) { + send(hCatEventMessage, topicName, NUM_RETRIES); + } + + /** + * @param hCatEventMessage The HCatEventMessage being sent over JMS, this method is threadsafe + * @param topicName is the name on message broker on which message is sent. + * @param retries the number of retry attempts + */ + protected void send(HCatEventMessage hCatEventMessage, String topicName, int retries) { + try { + if (session.get() == null) { + // Need to reconnect + throw new JMSException("Invalid JMS session"); + } + Destination topic = createTopic(topicName); + Message msg = session.get().createTextMessage(hCatEventMessage.toString()); + + msg.setStringProperty(HCatConstants.HCAT_EVENT, hCatEventMessage.getEventType().toString()); + msg.setStringProperty(HCatConstants.HCAT_MESSAGE_VERSION, messageFactory.getVersion()); + msg.setStringProperty(HCatConstants.HCAT_MESSAGE_FORMAT, messageFactory.getMessageFormat()); + MessageProducer producer = createProducer(topic); + producer.send(msg); + // Message must be transacted before we return. + session.get().commit(); + } catch (Exception e) { + if (retries >= 0) { + // this may happen if we were able to establish connection once, but its no longer valid + LOG.error("Seems like connection is lost. Will retry. Retries left : " + retries + ". error was:", e); + testAndCreateConnection(); + send(hCatEventMessage, topicName, retries - 1); + } else { + // Gobble up the exception. Message delivery is best effort. + LOG.error("Failed to send message on topic: " + topicName + + " event: " + hCatEventMessage.getEventType() + " after retries: " + NUM_RETRIES, e); + } + } + } + + /** + * Get the topic object for the topicName + * + * @param topicName The String identifying the message-topic. + * @return A {@link Topic} object corresponding to the specified topicName. + * @throws JMSException + */ + protected Topic createTopic(final String topicName) throws JMSException { + return session.get().createTopic(topicName); + } + + /** + * Does a health check on the connection by sending a dummy message. + * Create the connection if the connection is found to be bad + * Also recreates the session + */ + protected synchronized void testAndCreateConnection() { + if (conn != null) { + // This method is reached when error occurs while sending msg, so the session must be bad + session.remove(); + if (!isConnectionHealthy()) { + // I am the first thread to detect the error, cleanup old connection & reconnect + try { + conn.close(); + } catch (Exception e) { + LOG.error("Unable to close bad JMS connection, ignored error", e); + } + conn = createConnection(); + } + } else { + conn = createConnection(); + } + try { + session.set(createSession()); + } catch (JMSException e) { + LOG.error("Couldn't create JMS session, ignored the error", e); + } + } + + /** + * Create the JMS connection + * @return newly created JMS connection + */ + protected Connection createConnection() { + LOG.info("Will create new JMS connection"); + Context jndiCntxt; + Connection jmsConnection = null; + try { + jndiCntxt = new InitialContext(); + ConnectionFactory connFac = (ConnectionFactory) jndiCntxt.lookup("ConnectionFactory"); + jmsConnection = connFac.createConnection(); + jmsConnection.start(); + jmsConnection.setExceptionListener(new ExceptionListener() { + @Override + public void onException(JMSException jmse) { + LOG.error("JMS Exception listener received exception. Ignored the error", jmse); + } + }); + } catch (NamingException e) { + LOG.error("JNDI error while setting up Message Bus connection. " + + "Please make sure file named 'jndi.properties' is in " + + "classpath and contains appropriate key-value pairs.", e); + } catch (JMSException e) { + LOG.error("Failed to initialize connection to message bus", e); + } catch (Throwable t) { + LOG.error("Unable to connect to JMS provider", t); + } + return jmsConnection; + } + + /** + * Send a dummy message to probe if the JMS connection is healthy + * @return true if connection is healthy, false otherwise + */ + protected boolean isConnectionHealthy() { + try { + Topic topic = createTopic(getTopicPrefix(getConf()) + "." + HEALTH_CHECK_TOPIC_SUFFIX); + MessageProducer producer = createProducer(topic); + Message msg = session.get().createTextMessage(HEALTH_CHECK_MSG); + producer.send(msg, DeliveryMode.NON_PERSISTENT, 4, 0); + } catch (Exception e) { + return false; + } + return true; + } + + /** + * Creates a JMS session + * @return newly create JMS session + * @throws JMSException + */ + protected Session createSession() throws JMSException { + // We want message to be sent when session commits, thus we run in + // transacted mode. + return conn.createSession(true, Session.SESSION_TRANSACTED); + } + + /** + * Create a JMS producer + * @param topic + * @return newly created message producer + * @throws JMSException + */ + protected MessageProducer createProducer(Destination topic) throws JMSException { + return session.get().createProducer(topic); + } + + @Override + protected void finalize() throws Throwable { + if (conn != null) { + try { + conn.close(); + } catch (Exception e) { + LOG.error("Couldn't close jms connection, ignored the error", e); + } + } + } + + @Override + public void onLoadPartitionDone(LoadPartitionDoneEvent lpde) + throws MetaException { +// TODO: Fix LoadPartitionDoneEvent. Currently, LPDE can only carry a single partition-spec. And that defeats the purpose. +// if(lpde.getStatus()) +// send(lpde.getPartitionName(),lpde.getTable().getParameters().get(HCatConstants.HCAT_MSGBUS_TOPIC_NAME),HCatConstants.HCAT_PARTITION_DONE_EVENT); + } + + @Override + public void onAlterPartition(AlterPartitionEvent ape) throws MetaException { + // no-op + } + + @Override + public void onAlterTable(AlterTableEvent ate) throws MetaException { + // no-op + } +} diff --git hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/AddPartitionMessage.java hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/AddPartitionMessage.java new file mode 100644 index 0000000..786380b --- /dev/null +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/AddPartitionMessage.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.messaging; + +import java.util.List; +import java.util.Map; + +/** + * The HCat message sent when partition(s) are added to a table. + */ +public abstract class AddPartitionMessage extends HCatEventMessage { + + protected AddPartitionMessage() { + super(EventType.ADD_PARTITION); + } + + /** + * Getter for name of table (where partitions are added). + * @return Table-name (String). + */ + public abstract String getTable(); + + /** + * Getter for list of partitions added. + * @return List of maps, where each map identifies values for each partition-key, for every added partition. + */ + public abstract List> getPartitions (); + + @Override + public HCatEventMessage checkValid() { + if (getTable() == null) + throw new IllegalStateException("Table name unset."); + if (getPartitions() == null) + throw new IllegalStateException("Partition-list unset."); + return super.checkValid(); + } +} diff --git hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/CreateDatabaseMessage.java hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/CreateDatabaseMessage.java new file mode 100644 index 0000000..380890f --- /dev/null +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/CreateDatabaseMessage.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.messaging; + +/** + * HCat message sent when a Database is created in HCatalog. + */ +public abstract class CreateDatabaseMessage extends HCatEventMessage { + + protected CreateDatabaseMessage() { + super(EventType.CREATE_DATABASE); + } + +} diff --git hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/CreateTableMessage.java hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/CreateTableMessage.java new file mode 100644 index 0000000..240c3b8 --- /dev/null +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/CreateTableMessage.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.messaging; + +/** + * HCat message sent when a table is created in HCatalog. + */ +public abstract class CreateTableMessage extends HCatEventMessage { + + protected CreateTableMessage() { + super(EventType.CREATE_TABLE); + } + + /** + * Getter for the name of table created in HCatalog. + * @return Table-name (String). + */ + public abstract String getTable(); + + @Override + public HCatEventMessage checkValid() { + if (getTable() == null) + throw new IllegalStateException("Table name unset."); + return super.checkValid(); + } +} diff --git hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/DropDatabaseMessage.java hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/DropDatabaseMessage.java new file mode 100644 index 0000000..a0279ef --- /dev/null +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/DropDatabaseMessage.java @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.messaging; + +/** + * HCat message sent when a Database is dropped from HCatalog. + */ +public abstract class DropDatabaseMessage extends HCatEventMessage { + + protected DropDatabaseMessage() { + super(EventType.DROP_DATABASE); + } +} diff --git hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/DropPartitionMessage.java hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/DropPartitionMessage.java new file mode 100644 index 0000000..57bf455 --- /dev/null +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/DropPartitionMessage.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.messaging; + +import java.util.List; +import java.util.Map; + +/** + * HCat message sent when a partition is dropped in HCatalog. + */ +public abstract class DropPartitionMessage extends HCatEventMessage { + + protected DropPartitionMessage() { + super(EventType.DROP_PARTITION); + } + + public abstract String getTable(); + public abstract List> getPartitions (); + + @Override + public HCatEventMessage checkValid() { + if (getTable() == null) + throw new IllegalStateException("Table name unset."); + if (getPartitions() == null) + throw new IllegalStateException("Partition-list unset."); + return super.checkValid(); + } +} diff --git hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/DropTableMessage.java hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/DropTableMessage.java new file mode 100644 index 0000000..4e6233e --- /dev/null +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/DropTableMessage.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.messaging; + +/** + * HCat message sent when a Table is dropped in HCatalog. + */ +public abstract class DropTableMessage extends HCatEventMessage { + + protected DropTableMessage() { + super(EventType.DROP_TABLE); + } + + /** + * Getter for the name of the table being dropped. + * @return Table-name (String). + */ + public abstract String getTable(); + + @Override + public HCatEventMessage checkValid() { + if (getTable() == null) + throw new IllegalStateException("Table name unset."); + return super.checkValid(); + } +} diff --git hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/HCatEventMessage.java hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/HCatEventMessage.java new file mode 100644 index 0000000..1afca14 --- /dev/null +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/HCatEventMessage.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.messaging; + +import org.apache.hive.hcatalog.common.HCatConstants; + +/** + * Class representing messages emitted when Metastore operations are done. + * (E.g. Creation and deletion of databases, tables and partitions.) + */ +public abstract class HCatEventMessage { + + /** + * Enumeration of all supported types of Metastore operations. + */ + public static enum EventType { + + CREATE_DATABASE(HCatConstants.HCAT_CREATE_DATABASE_EVENT), + DROP_DATABASE(HCatConstants.HCAT_DROP_DATABASE_EVENT), + CREATE_TABLE(HCatConstants.HCAT_CREATE_TABLE_EVENT), + DROP_TABLE(HCatConstants.HCAT_DROP_TABLE_EVENT), + ADD_PARTITION(HCatConstants.HCAT_ADD_PARTITION_EVENT), + DROP_PARTITION(HCatConstants.HCAT_DROP_PARTITION_EVENT); + + private String typeString; + + EventType(String typeString) { + this.typeString = typeString; + } + + @Override + public String toString() { return typeString; } + } + + protected EventType eventType; + + protected HCatEventMessage(EventType eventType) { + this.eventType = eventType; + } + + public EventType getEventType() { + return eventType; + } + + /** + * Getter for HCatalog Server's URL. + * (This is where the event originates from.) + * @return HCatalog Server's URL (String). + */ + public abstract String getServer(); + + /** + * Getter for the Kerberos principal of the HCatalog service. + * @return HCatalog Service Principal (String). + */ + public abstract String getServicePrincipal(); + + /** + * Getter for the name of the Database on which the Metastore operation is done. + * @return Database-name (String). + */ + public abstract String getDB(); + + /** + * Getter for the timestamp associated with the operation. + * @return Timestamp (Long - seconds since epoch). + */ + public abstract Long getTimestamp(); + + /** + * Class invariant. Checked after construction or deserialization. + */ + public HCatEventMessage checkValid() { + if (getServer() == null || getServicePrincipal() == null) + throw new IllegalStateException("Server-URL/Service-Principal shouldn't be null."); + if (getEventType() == null) + throw new IllegalStateException("Event-type unset."); + if (getDB() == null) + throw new IllegalArgumentException("DB-name unset."); + + return this; + } +} diff --git hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/MessageDeserializer.java hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/MessageDeserializer.java new file mode 100644 index 0000000..1f4ba08 --- /dev/null +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/MessageDeserializer.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.messaging; + +/** + * Interface for converting HCat events from String-form back to HCatEventMessage instances. + */ +public abstract class MessageDeserializer { + + /** + * Method to construct HCatEventMessage from string. + */ + public HCatEventMessage getHCatEventMessage(String eventTypeString, String messageBody) { + + switch (HCatEventMessage.EventType.valueOf(eventTypeString)) { + case CREATE_DATABASE: + return getCreateDatabaseMessage(messageBody); + case DROP_DATABASE: + return getDropDatabaseMessage(messageBody); + case CREATE_TABLE: + return getCreateTableMessage(messageBody); + case DROP_TABLE: + return getDropTableMessage(messageBody); + case ADD_PARTITION: + return getAddPartitionMessage(messageBody); + case DROP_PARTITION: + return getDropPartitionMessage(messageBody); + + default: + throw new IllegalArgumentException("Unsupported event-type: " + eventTypeString); + } + } + + /** + * Method to de-serialize CreateDatabaseMessage instance. + */ + public abstract CreateDatabaseMessage getCreateDatabaseMessage(String messageBody); + + /** + * Method to de-serialize DropDatabaseMessage instance. + */ + public abstract DropDatabaseMessage getDropDatabaseMessage(String messageBody); + + /** + * Method to de-serialize CreateTableMessage instance. + */ + public abstract CreateTableMessage getCreateTableMessage(String messageBody); + + /** + * Method to de-serialize DropTableMessage instance. + */ + public abstract DropTableMessage getDropTableMessage(String messageBody); + + /** + * Method to de-serialize AddPartitionMessage instance. + */ + public abstract AddPartitionMessage getAddPartitionMessage(String messageBody); + + /** + * Method to de-serialize DropPartitionMessage instance. + */ + public abstract DropPartitionMessage getDropPartitionMessage(String messageBody); + + // Protection against construction. + protected MessageDeserializer() {} +} diff --git hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/MessageFactory.java hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/MessageFactory.java new file mode 100644 index 0000000..ff0ecdf --- /dev/null +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/MessageFactory.java @@ -0,0 +1,138 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.messaging; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hive.hcatalog.messaging.json.JSONMessageFactory; + +/** + * Abstract Factory for the construction of HCatalog message instances. + */ +public abstract class MessageFactory { + + private static MessageFactory instance = new JSONMessageFactory(); + + protected static final HiveConf hiveConf = new HiveConf(); + static { + hiveConf.addResource("hive-site.xml"); + } + + private static final String CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX = "hcatalog.message.factory.impl."; + private static final String CONF_LABEL_HCAT_MESSAGE_FORMAT = "hcatalog.message.format"; + private static final String HCAT_MESSAGE_FORMAT = hiveConf.get(CONF_LABEL_HCAT_MESSAGE_FORMAT, "json"); + private static final String DEFAULT_MESSAGE_FACTORY_IMPL = "org.apache.hive.hcatalog.messaging.json.JSONMessageFactory"; + private static final String HCAT_MESSAGE_FACTORY_IMPL = hiveConf.get(CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX + + HCAT_MESSAGE_FORMAT, + DEFAULT_MESSAGE_FACTORY_IMPL); + + protected static final String HCAT_SERVER_URL = hiveConf.get(HiveConf.ConfVars.METASTOREURIS.name(), ""); + protected static final String HCAT_SERVICE_PRINCIPAL = hiveConf.get(HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL.name(), ""); + + /** + * Getter for MessageFactory instance. + */ + public static MessageFactory getInstance() { + if (instance == null) { + instance = getInstance(HCAT_MESSAGE_FACTORY_IMPL); + } + return instance; + } + + private static MessageFactory getInstance(String className) { + try { + return (MessageFactory)ReflectionUtils.newInstance(Class.forName(className), hiveConf); + } + catch (ClassNotFoundException classNotFound) { + throw new IllegalStateException("Could not construct MessageFactory implementation: ", classNotFound); + } + } + + /** + * Getter for MessageDeserializer, corresponding to the specified format and version. + * @param format Serialization format for notifications. + * @param version Version of serialization format (currently ignored.) + * @return MessageDeserializer. + */ + public static MessageDeserializer getDeserializer(String format, + String version) { + return getInstance(hiveConf.get(CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX + format, + DEFAULT_MESSAGE_FACTORY_IMPL)).getDeserializer(); + } + + public abstract MessageDeserializer getDeserializer(); + + /** + * Getter for version-string, corresponding to all constructed messages. + */ + public abstract String getVersion(); + + /** + * Getter for message-format. + */ + public abstract String getMessageFormat(); + + /** + * Factory method for CreateDatabaseMessage. + * @param db The Database being added. + * @return CreateDatabaseMessage instance. + */ + public abstract CreateDatabaseMessage buildCreateDatabaseMessage(Database db); + + /** + * Factory method for DropDatabaseMessage. + * @param db The Database being dropped. + * @return DropDatabaseMessage instance. + */ + public abstract DropDatabaseMessage buildDropDatabaseMessage(Database db); + + /** + * Factory method for CreateTableMessage. + * @param table The Table being created. + * @return CreateTableMessage instance. + */ + public abstract CreateTableMessage buildCreateTableMessage(Table table); + + /** + * Factory method for DropTableMessage. + * @param table The Table being dropped. + * @return DropTableMessage instance. + */ + public abstract DropTableMessage buildDropTableMessage(Table table); + + /** + * Factory method for AddPartitionMessage. + * @param table The Table to which the partition is added. + * @param partition The Partition being added. + * @return AddPartitionMessage instance. + */ + public abstract AddPartitionMessage buildAddPartitionMessage(Table table, Partition partition); + + /** + * Factory method for DropPartitionMessage. + * @param table The Table from which the partition is dropped. + * @param partition The Partition being dropped. + * @return DropPartitionMessage instance. + */ + public abstract DropPartitionMessage buildDropPartitionMessage(Table table, Partition partition); +} diff --git hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/jms/MessagingUtils.java hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/jms/MessagingUtils.java new file mode 100644 index 0000000..65c4770 --- /dev/null +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/jms/MessagingUtils.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.messaging.jms; + +import org.apache.commons.lang.StringUtils; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.messaging.HCatEventMessage; +import org.apache.hive.hcatalog.messaging.MessageFactory; + +import javax.jms.JMSException; +import javax.jms.Message; +import javax.jms.TextMessage; + +/** + * Helper Utility to assist consumers of HCat Messages in extracting + * message-content from JMS messages. + */ +public class MessagingUtils { + + /** + * Method to return HCatEventMessage contained in the JMS message. + * @param message The JMS Message instance + * @return The contained HCatEventMessage + */ + public static HCatEventMessage getMessage(Message message) { + try { + String messageBody = ((TextMessage)message).getText(); + String eventType = message.getStringProperty(HCatConstants.HCAT_EVENT); + String messageVersion = message.getStringProperty(HCatConstants.HCAT_MESSAGE_VERSION); + String messageFormat = message.getStringProperty(HCatConstants.HCAT_MESSAGE_FORMAT); + + if (StringUtils.isEmpty(messageBody) || StringUtils.isEmpty(eventType)) + throw new IllegalArgumentException("Could not extract HCatEventMessage. " + + "EventType and/or MessageBody is null/empty."); + + return MessageFactory.getDeserializer(messageFormat, messageVersion).getHCatEventMessage(eventType, messageBody); + } + catch (JMSException exception) { + throw new IllegalArgumentException("Could not extract HCatEventMessage. ", exception); + } + } + + // Prevent construction. + private MessagingUtils() {} +} diff --git hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONAddPartitionMessage.java hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONAddPartitionMessage.java new file mode 100644 index 0000000..19484fc --- /dev/null +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONAddPartitionMessage.java @@ -0,0 +1,85 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.messaging.json; + +import org.apache.hive.hcatalog.messaging.AddPartitionMessage; +import org.codehaus.jackson.annotate.JsonProperty; + +import java.util.List; +import java.util.Map; + +/** + * JSON implementation of AddPartitionMessage. + */ +public class JSONAddPartitionMessage extends AddPartitionMessage { + + @JsonProperty + String server, servicePrincipal, db, table; + + @JsonProperty + Long timestamp; + + @JsonProperty + List> partitions; + + /** + * Default Constructor. Required for Jackson. + */ + public JSONAddPartitionMessage() {} + + public JSONAddPartitionMessage(String server, String servicePrincipal, String db, String table, + List> partitions, Long timestamp) { + this.server = server; + this.servicePrincipal = servicePrincipal; + this.db = db; + this.table = table; + this.partitions = partitions; + this.timestamp = timestamp; + checkValid(); + } + + @Override + public String getServer() { return server; } + + @Override + public String getServicePrincipal() { return servicePrincipal; } + + @Override + public String getDB() { return db; } + + @Override + public String getTable() { return table; } + + @Override + public Long getTimestamp() { return timestamp; } + + @Override + public List> getPartitions () { return partitions; } + + @Override + public String toString() { + try { + return JSONMessageDeserializer.mapper.writeValueAsString(this); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not serialize: ", exception); + } + } +} diff --git hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONCreateDatabaseMessage.java hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONCreateDatabaseMessage.java new file mode 100644 index 0000000..1b11658 --- /dev/null +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONCreateDatabaseMessage.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.messaging.json; + +import org.apache.hive.hcatalog.messaging.CreateDatabaseMessage; +import org.codehaus.jackson.annotate.JsonProperty; + +/** + * JSON Implementation of CreateDatabaseMessage. + */ +public class JSONCreateDatabaseMessage extends CreateDatabaseMessage { + + @JsonProperty + String server, servicePrincipal, db; + + @JsonProperty + Long timestamp; + + /** + * Default constructor, required for Jackson. + */ + public JSONCreateDatabaseMessage() {} + + public JSONCreateDatabaseMessage(String server, String servicePrincipal, String db, Long timestamp) { + this.server = server; + this.servicePrincipal = servicePrincipal; + this.db = db; + this.timestamp = timestamp; + checkValid(); + } + + @Override + public String getDB() { return db; } + + @Override + public String getServer() { return server; } + + @Override + public String getServicePrincipal() { return servicePrincipal; } + + @Override + public Long getTimestamp() { return timestamp; } + + @Override + public String toString() { + try { + return JSONMessageDeserializer.mapper.writeValueAsString(this); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not serialize: ", exception); + } + } + +} diff --git hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONCreateTableMessage.java hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONCreateTableMessage.java new file mode 100644 index 0000000..f7cc085 --- /dev/null +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONCreateTableMessage.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.messaging.json; + +import org.apache.hive.hcatalog.messaging.CreateTableMessage; +import org.codehaus.jackson.annotate.JsonProperty; + +/** + * JSON implementation of CreateTableMessage. + */ +public class JSONCreateTableMessage extends CreateTableMessage { + + @JsonProperty + String server, servicePrincipal, db, table; + + @JsonProperty + Long timestamp; + + /** + * Default constructor, needed for Jackson. + */ + public JSONCreateTableMessage() {} + + public JSONCreateTableMessage(String server, String servicePrincipal, String db, String table, Long timestamp) { + this.server = server; + this.servicePrincipal = servicePrincipal; + this.db = db; + this.table = table; + this.timestamp = timestamp; + checkValid(); + } + + @Override + public String getServer() { return server; } + + @Override + public String getServicePrincipal() { return servicePrincipal; } + + @Override + public String getDB() { return db; } + + @Override + public Long getTimestamp() { return timestamp; } + + @Override + public String getTable() { return table; } + + @Override + public String toString() { + try { + return JSONMessageDeserializer.mapper.writeValueAsString(this); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not serialize: ", exception); + } + } +} diff --git hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONDropDatabaseMessage.java hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONDropDatabaseMessage.java new file mode 100644 index 0000000..bf8d8c7 --- /dev/null +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONDropDatabaseMessage.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.messaging.json; + +import org.apache.hive.hcatalog.messaging.DropDatabaseMessage; +import org.codehaus.jackson.annotate.JsonProperty; + +/** + * JSON implementation of DropDatabaseMessage. + */ +public class JSONDropDatabaseMessage extends DropDatabaseMessage { + + @JsonProperty + String server, servicePrincipal, db; + + @JsonProperty + Long timestamp; + + /** + * Default constructor, required for Jackson. + */ + public JSONDropDatabaseMessage() {} + + public JSONDropDatabaseMessage(String server, String servicePrincipal, String db, Long timestamp) { + this.server = server; + this.servicePrincipal = servicePrincipal; + this.db = db; + this.timestamp = timestamp; + checkValid(); + } + + + @Override + public String getServer() { return server; } + + @Override + public String getServicePrincipal() { return servicePrincipal; } + + @Override + public String getDB() { return db; } + + @Override + public Long getTimestamp() { return timestamp; } + + @Override + public String toString() { + try { + return JSONMessageDeserializer.mapper.writeValueAsString(this); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not serialize: ", exception); + } + } +} diff --git hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONDropPartitionMessage.java hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONDropPartitionMessage.java new file mode 100644 index 0000000..500d75a --- /dev/null +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONDropPartitionMessage.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.messaging.json; + +import org.apache.hive.hcatalog.messaging.DropPartitionMessage; +import org.codehaus.jackson.annotate.JsonProperty; + +import java.util.List; +import java.util.Map; + +/** + * JSON implementation of DropPartitionMessage. + */ +public class JSONDropPartitionMessage extends DropPartitionMessage { + + @JsonProperty + String server, servicePrincipal, db, table; + + @JsonProperty + Long timestamp; + + @JsonProperty + List> partitions; + + /** + * Default Constructor. Required for Jackson. + */ + public JSONDropPartitionMessage() {} + + public JSONDropPartitionMessage(String server, String servicePrincipal, String db, String table, + List> partitions, Long timestamp) { + this.server = server; + this.servicePrincipal = servicePrincipal; + this.db = db; + this.table = table; + this.partitions = partitions; + this.timestamp = timestamp; + checkValid(); + } + + + @Override + public String getServer() { return server; } + + @Override + public String getServicePrincipal() { return servicePrincipal; } + + @Override + public String getDB() { return db; } + + @Override + public String getTable() { return table; } + + @Override + public Long getTimestamp() { return timestamp; } + + @Override + public List> getPartitions () { return partitions; } + + @Override + public String toString() { + try { + return JSONMessageDeserializer.mapper.writeValueAsString(this); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not serialize: ", exception); + } + } +} diff --git hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONDropTableMessage.java hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONDropTableMessage.java new file mode 100644 index 0000000..3c19a64 --- /dev/null +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONDropTableMessage.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.messaging.json; + +import org.apache.hive.hcatalog.messaging.DropTableMessage; +import org.codehaus.jackson.annotate.JsonProperty; + +/** + * JSON implementation of DropTableMessage. + */ +public class JSONDropTableMessage extends DropTableMessage { + + @JsonProperty + String server, servicePrincipal, db, table; + + @JsonProperty + Long timestamp; + + /** + * Default constructor, needed for Jackson. + */ + public JSONDropTableMessage() {} + + public JSONDropTableMessage(String server, String servicePrincipal, String db, String table, Long timestamp) { + this.server = server; + this.servicePrincipal = servicePrincipal; + this.db = db; + this.table = table; + this.timestamp = timestamp; + checkValid(); + } + + + @Override + public String getTable() { return table; } + + @Override + public String getServer() { return server; } + + @Override + public String getServicePrincipal() { return servicePrincipal; } + + @Override + public String getDB() { return db; } + + @Override + public Long getTimestamp() { return timestamp; } + + @Override + public String toString() { + try { + return JSONMessageDeserializer.mapper.writeValueAsString(this); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not serialize: ", exception); + } + } + +} diff --git hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONMessageDeserializer.java hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONMessageDeserializer.java new file mode 100644 index 0000000..018f35c --- /dev/null +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONMessageDeserializer.java @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.messaging.json; + +import org.apache.hive.hcatalog.messaging.AddPartitionMessage; +import org.apache.hive.hcatalog.messaging.CreateDatabaseMessage; +import org.apache.hive.hcatalog.messaging.CreateTableMessage; +import org.apache.hive.hcatalog.messaging.DropDatabaseMessage; +import org.apache.hive.hcatalog.messaging.DropPartitionMessage; +import org.apache.hive.hcatalog.messaging.DropTableMessage; +import org.apache.hive.hcatalog.messaging.MessageDeserializer; +import org.codehaus.jackson.map.DeserializationConfig; +import org.codehaus.jackson.map.ObjectMapper; + +/** + * MessageDeserializer implementation, for deserializing from JSON strings. + */ +public class JSONMessageDeserializer extends MessageDeserializer { + + static ObjectMapper mapper = new ObjectMapper(); // Thread-safe. + + static { + mapper.configure(DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES, false); + } + + @Override + public CreateDatabaseMessage getCreateDatabaseMessage(String messageBody) { + try { + return mapper.readValue(messageBody, JSONCreateDatabaseMessage.class); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not construct JSONCreateDatabaseMessage.", exception); + } + } + + @Override + public DropDatabaseMessage getDropDatabaseMessage(String messageBody) { + try { + return mapper.readValue(messageBody, JSONDropDatabaseMessage.class); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not construct JSONDropDatabaseMessage.", exception); + } + } + + @Override + public CreateTableMessage getCreateTableMessage(String messageBody) { + try { + return mapper.readValue(messageBody, JSONCreateTableMessage.class); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not construct JSONCreateTableMessage.", exception); + } + } + + @Override + public DropTableMessage getDropTableMessage(String messageBody) { + try { + return mapper.readValue(messageBody, JSONDropTableMessage.class); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not construct JSONDropTableMessage.", exception); + } + } + + @Override + public AddPartitionMessage getAddPartitionMessage(String messageBody) { + try { + return mapper.readValue(messageBody, JSONAddPartitionMessage.class); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not construct AddPartitionMessage.", exception); + } + } + + @Override + public DropPartitionMessage getDropPartitionMessage(String messageBody) { + try { + return mapper.readValue(messageBody, JSONDropPartitionMessage.class); + } + catch (Exception exception) { + throw new IllegalArgumentException("Could not construct DropPartitionMessage.", exception); + } + } +} diff --git hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONMessageFactory.java hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONMessageFactory.java new file mode 100644 index 0000000..92d39ed --- /dev/null +++ hcatalog/server-extensions/src/main/java/org/apache/hive/hcatalog/messaging/json/JSONMessageFactory.java @@ -0,0 +1,106 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.messaging.json; + +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hive.hcatalog.messaging.AddPartitionMessage; +import org.apache.hive.hcatalog.messaging.CreateDatabaseMessage; +import org.apache.hive.hcatalog.messaging.CreateTableMessage; +import org.apache.hive.hcatalog.messaging.DropDatabaseMessage; +import org.apache.hive.hcatalog.messaging.DropPartitionMessage; +import org.apache.hive.hcatalog.messaging.DropTableMessage; +import org.apache.hive.hcatalog.messaging.MessageDeserializer; +import org.apache.hive.hcatalog.messaging.MessageFactory; + +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * The JSON implementation of the MessageFactory. Constructs JSON implementations of + * each message-type. + */ +public class JSONMessageFactory extends MessageFactory { + + private static JSONMessageDeserializer deserializer = new JSONMessageDeserializer(); + + @Override + public MessageDeserializer getDeserializer() { + return deserializer; + } + + @Override + public String getVersion() { + return "0.1"; + } + + @Override + public String getMessageFormat() { + return "json"; + } + + @Override + public CreateDatabaseMessage buildCreateDatabaseMessage(Database db) { + return new JSONCreateDatabaseMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, db.getName(), + System.currentTimeMillis() / 1000); + } + + @Override + public DropDatabaseMessage buildDropDatabaseMessage(Database db) { + return new JSONDropDatabaseMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, db.getName(), + System.currentTimeMillis() / 1000); + } + + @Override + public CreateTableMessage buildCreateTableMessage(Table table) { + return new JSONCreateTableMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, table.getDbName(), + table.getTableName(), System.currentTimeMillis()/1000); + } + + @Override + public DropTableMessage buildDropTableMessage(Table table) { + return new JSONDropTableMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, table.getDbName(), table.getTableName(), + System.currentTimeMillis()/1000); + } + + @Override + public AddPartitionMessage buildAddPartitionMessage(Table table, Partition partition) { + return new JSONAddPartitionMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, partition.getDbName(), + partition.getTableName(), Arrays.asList(getPartitionKeyValues(table, partition)), + System.currentTimeMillis()/1000); + } + + @Override + public DropPartitionMessage buildDropPartitionMessage(Table table, Partition partition) { + return new JSONDropPartitionMessage(HCAT_SERVER_URL, HCAT_SERVICE_PRINCIPAL, partition.getDbName(), + partition.getTableName(), Arrays.asList(getPartitionKeyValues(table, partition)), + System.currentTimeMillis()/1000); + } + + private static Map getPartitionKeyValues(Table table, Partition partition) { + Map partitionKeys = new LinkedHashMap(); + for (int i=0; i actualMessages = new ArrayList(); - - @Before - public void setUp() throws Exception { - System.setProperty("java.naming.factory.initial", - "org.apache.activemq.jndi.ActiveMQInitialContextFactory"); - System.setProperty("java.naming.provider.url", - "vm://localhost?broker.persistent=false"); - ConnectionFactory connFac = new ActiveMQConnectionFactory( - "vm://localhost?broker.persistent=false"); - Connection conn = connFac.createConnection(); - conn.start(); - // We want message to be sent when session commits, thus we run in - // transacted mode. - Session session = conn.createSession(true, Session.SESSION_TRANSACTED); - Destination hcatTopic = session - .createTopic(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX); - MessageConsumer consumer1 = session.createConsumer(hcatTopic); - consumer1.setMessageListener(this); - Destination tblTopic = session - .createTopic(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".mydb.mytbl"); - MessageConsumer consumer2 = session.createConsumer(tblTopic); - consumer2.setMessageListener(this); - Destination dbTopic = session - .createTopic(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".mydb"); - MessageConsumer consumer3 = session.createConsumer(dbTopic); - consumer3.setMessageListener(this); - - setUpHiveConf(); - hiveConf.set(ConfVars.METASTORE_EVENT_LISTENERS.varname, - NotificationListener.class.getName()); - SessionState.start(new CliSessionState(hiveConf)); - driver = new Driver(hiveConf); - client = new HiveMetaStoreClient(hiveConf); - } - - @After - public void tearDown() throws Exception { - List expectedMessages = Arrays.asList( - HCatConstants.HCAT_CREATE_DATABASE_EVENT, - HCatConstants.HCAT_CREATE_TABLE_EVENT, - HCatConstants.HCAT_ADD_PARTITION_EVENT, - HCatConstants.HCAT_DROP_PARTITION_EVENT, - HCatConstants.HCAT_DROP_TABLE_EVENT, - HCatConstants.HCAT_DROP_DATABASE_EVENT); - Assert.assertEquals(expectedMessages, actualMessages); - } - - @Test - public void testAMQListener() throws Exception { - driver.run("create database mydb"); - driver.run("use mydb"); - driver.run("create table mytbl (a string) partitioned by (b string)"); - driver.run("alter table mytbl add partition(b='2011')"); - Map kvs = new HashMap(1); - kvs.put("b", "2011"); - client.markPartitionForEvent("mydb", "mytbl", kvs, - PartitionEventType.LOAD_DONE); - driver.run("alter table mytbl drop partition(b='2011')"); - driver.run("drop table mytbl"); - driver.run("drop database mydb"); - } - - @Override - public void onMessage(Message msg) { - String event; - try { - event = msg.getStringProperty(HCatConstants.HCAT_EVENT); - String format = msg.getStringProperty(HCatConstants.HCAT_MESSAGE_FORMAT); - String version = msg.getStringProperty(HCatConstants.HCAT_MESSAGE_VERSION); - String messageBody = ((TextMessage)msg).getText(); - actualMessages.add(event); - MessageDeserializer deserializer = MessageFactory.getDeserializer(format, version); - - if (event.equals(HCatConstants.HCAT_CREATE_DATABASE_EVENT)) { - - Assert.assertEquals("topic://" + HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX, msg - .getJMSDestination().toString()); - CreateDatabaseMessage message = deserializer.getCreateDatabaseMessage(messageBody); - Assert.assertEquals("mydb", message.getDB()); - HCatEventMessage message2 = MessagingUtils.getMessage(msg); - Assert.assertTrue("Unexpected message-type.", message2 instanceof CreateDatabaseMessage); - Assert.assertEquals("mydb", message2.getDB()); - } else if (event.equals(HCatConstants.HCAT_CREATE_TABLE_EVENT)) { - - Assert.assertEquals("topic://hcat.mydb", msg.getJMSDestination().toString()); - CreateTableMessage message = deserializer.getCreateTableMessage(messageBody); - Assert.assertEquals("mytbl", message.getTable()); - Assert.assertEquals("mydb", message.getDB()); - HCatEventMessage message2 = MessagingUtils.getMessage(msg); - Assert.assertTrue("Unexpected message-type.", message2 instanceof CreateTableMessage); - Assert.assertEquals("mydb", message2.getDB()); - Assert.assertEquals("mytbl", ((CreateTableMessage) message2).getTable()); - } else if (event.equals(HCatConstants.HCAT_ADD_PARTITION_EVENT)) { - - Assert.assertEquals("topic://hcat.mydb.mytbl", msg.getJMSDestination() - .toString()); - AddPartitionMessage message = deserializer.getAddPartitionMessage(messageBody); - Assert.assertEquals("mytbl", message.getTable()); - Assert.assertEquals("mydb", message.getDB()); - Assert.assertEquals(1, message.getPartitions().size()); - Assert.assertEquals("2011", message.getPartitions().get(0).get("b")); - HCatEventMessage message2 = MessagingUtils.getMessage(msg); - Assert.assertTrue("Unexpected message-type.", message2 instanceof AddPartitionMessage); - Assert.assertEquals("mydb", message2.getDB()); - Assert.assertEquals("mytbl", ((AddPartitionMessage) message2).getTable()); - Assert.assertEquals(1, ((AddPartitionMessage) message2).getPartitions().size()); - Assert.assertEquals("2011", ((AddPartitionMessage) message2).getPartitions().get(0).get("b")); - } else if (event.equals(HCatConstants.HCAT_DROP_PARTITION_EVENT)) { - - Assert.assertEquals("topic://hcat.mydb.mytbl", msg.getJMSDestination() - .toString()); - DropPartitionMessage message = deserializer.getDropPartitionMessage(messageBody); - Assert.assertEquals("mytbl", message.getTable()); - Assert.assertEquals("mydb", message.getDB()); - Assert.assertEquals(1, message.getPartitions().size()); - Assert.assertEquals("2011", message.getPartitions().get(0).get("b")); - HCatEventMessage message2 = MessagingUtils.getMessage(msg); - Assert.assertTrue("Unexpected message-type.", message2 instanceof DropPartitionMessage); - Assert.assertEquals("mydb", message2.getDB()); - Assert.assertEquals("mytbl", ((DropPartitionMessage) message2).getTable()); - Assert.assertEquals(1, ((DropPartitionMessage) message2).getPartitions().size()); - Assert.assertEquals("2011", ((DropPartitionMessage) message2).getPartitions().get(0).get("b")); - } else if (event.equals(HCatConstants.HCAT_DROP_TABLE_EVENT)) { - - Assert.assertEquals("topic://hcat.mydb", msg.getJMSDestination().toString()); - DropTableMessage message = deserializer.getDropTableMessage(messageBody); - Assert.assertEquals("mytbl", message.getTable()); - Assert.assertEquals("mydb", message.getDB()); - HCatEventMessage message2 = MessagingUtils.getMessage(msg); - Assert.assertTrue("Unexpected message-type.", message2 instanceof DropTableMessage); - Assert.assertEquals("mydb", message2.getDB()); - Assert.assertEquals("mytbl", ((DropTableMessage) message2).getTable()); - } else if (event.equals(HCatConstants.HCAT_DROP_DATABASE_EVENT)) { - - Assert.assertEquals("topic://" + HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX, msg - .getJMSDestination().toString()); - DropDatabaseMessage message = deserializer.getDropDatabaseMessage(messageBody); - Assert.assertEquals("mydb", message.getDB()); - HCatEventMessage message2 = MessagingUtils.getMessage(msg); - Assert.assertTrue("Unexpected message-type.", message2 instanceof DropDatabaseMessage); - Assert.assertEquals("mydb", message2.getDB()); - } else if (event.equals(HCatConstants.HCAT_PARTITION_DONE_EVENT)) { - // TODO: Fill in when PARTITION_DONE_EVENT is supported. - Assert.assertTrue("Unexpected: HCAT_PARTITION_DONE_EVENT not supported (yet).", false); - } else { - Assert.assertTrue("Unexpected event-type: " + event, false); - } - - } catch (JMSException e) { - e.printStackTrace(System.err); - assert false; - } - } -} diff --git hcatalog/server-extensions/src/test/java/org/apache/hive/hcatalog/listener/TestMsgBusConnection.java hcatalog/server-extensions/src/test/java/org/apache/hive/hcatalog/listener/TestMsgBusConnection.java new file mode 100644 index 0000000..6354d27 --- /dev/null +++ hcatalog/server-extensions/src/test/java/org/apache/hive/hcatalog/listener/TestMsgBusConnection.java @@ -0,0 +1,123 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.listener; + +import javax.jms.Connection; +import javax.jms.ConnectionFactory; +import javax.jms.Destination; +import javax.jms.JMSException; +import javax.jms.Message; +import javax.jms.MessageConsumer; +import javax.jms.TextMessage; +import javax.jms.Session; + +import junit.framework.TestCase; + +import org.apache.activemq.ActiveMQConnectionFactory; +import org.apache.activemq.broker.BrokerService; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.messaging.HCatEventMessage; +import org.apache.hive.hcatalog.messaging.jms.MessagingUtils; + +public class TestMsgBusConnection extends TestCase { + + private Driver driver; + private BrokerService broker; + private MessageConsumer consumer; + + @Override + protected void setUp() throws Exception { + + super.setUp(); + broker = new BrokerService(); + // configure the broker + broker.addConnector("tcp://localhost:61616?broker.persistent=false"); + + broker.start(); + + System.setProperty("java.naming.factory.initial", + "org.apache.activemq.jndi.ActiveMQInitialContextFactory"); + System.setProperty("java.naming.provider.url", "tcp://localhost:61616"); + connectClient(); + HiveConf hiveConf = new HiveConf(this.getClass()); + hiveConf.set(ConfVars.METASTORE_EVENT_LISTENERS.varname, + NotificationListener.class.getName()); + hiveConf.set("hive.metastore.local", "true"); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hiveConf.set(HCatConstants.HCAT_MSGBUS_TOPIC_PREFIX, "planetlab.hcat"); + SessionState.start(new CliSessionState(hiveConf)); + driver = new Driver(hiveConf); + } + + private void connectClient() throws JMSException { + ConnectionFactory connFac = new ActiveMQConnectionFactory( + "tcp://localhost:61616"); + Connection conn = connFac.createConnection(); + conn.start(); + Session session = conn.createSession(true, Session.SESSION_TRANSACTED); + Destination hcatTopic = session.createTopic("planetlab.hcat"); + consumer = session.createConsumer(hcatTopic); + } + + public void testConnection() throws Exception { + + try { + driver.run("create database testconndb"); + Message msg = consumer.receive(); + assertTrue("Expected TextMessage", msg instanceof TextMessage); + assertEquals(HCatConstants.HCAT_CREATE_DATABASE_EVENT, + msg.getStringProperty(HCatConstants.HCAT_EVENT)); + assertEquals("topic://planetlab.hcat", msg.getJMSDestination().toString()); + HCatEventMessage messageObject = MessagingUtils.getMessage(msg); + assertEquals("testconndb", messageObject.getDB()); + broker.stop(); + driver.run("drop database testconndb cascade"); + broker.start(true); + connectClient(); + driver.run("create database testconndb"); + msg = consumer.receive(); + assertEquals(HCatConstants.HCAT_CREATE_DATABASE_EVENT, + msg.getStringProperty(HCatConstants.HCAT_EVENT)); + assertEquals("topic://planetlab.hcat", msg.getJMSDestination().toString()); + assertEquals("testconndb", messageObject.getDB()); + driver.run("drop database testconndb cascade"); + msg = consumer.receive(); + assertEquals(HCatConstants.HCAT_DROP_DATABASE_EVENT, + msg.getStringProperty(HCatConstants.HCAT_EVENT)); + assertEquals("topic://planetlab.hcat", msg.getJMSDestination().toString()); + assertEquals("testconndb", messageObject.getDB()); + } catch (NoSuchObjectException nsoe) { + nsoe.printStackTrace(System.err); + assert false; + } catch (AlreadyExistsException aee) { + aee.printStackTrace(System.err); + assert false; + } + } +} diff --git hcatalog/server-extensions/src/test/java/org/apache/hive/hcatalog/listener/TestNotificationListener.java hcatalog/server-extensions/src/test/java/org/apache/hive/hcatalog/listener/TestNotificationListener.java new file mode 100644 index 0000000..4d7c4da --- /dev/null +++ hcatalog/server-extensions/src/test/java/org/apache/hive/hcatalog/listener/TestNotificationListener.java @@ -0,0 +1,219 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.listener; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import javax.jms.Connection; +import javax.jms.ConnectionFactory; +import javax.jms.Destination; +import javax.jms.JMSException; +import javax.jms.TextMessage; +import javax.jms.Message; +import javax.jms.MessageConsumer; +import javax.jms.MessageListener; +import javax.jms.Session; + +import org.apache.activemq.ActiveMQConnectionFactory; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.PartitionEventType; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.mapreduce.HCatBaseTest; + +import org.apache.hive.hcatalog.messaging.AddPartitionMessage; +import org.apache.hive.hcatalog.messaging.CreateDatabaseMessage; +import org.apache.hive.hcatalog.messaging.CreateTableMessage; +import org.apache.hive.hcatalog.messaging.DropDatabaseMessage; +import org.apache.hive.hcatalog.messaging.DropPartitionMessage; +import org.apache.hive.hcatalog.messaging.DropTableMessage; +import org.apache.hive.hcatalog.messaging.HCatEventMessage; +import org.apache.hive.hcatalog.messaging.MessageDeserializer; +import org.apache.hive.hcatalog.messaging.MessageFactory; +import org.apache.hive.hcatalog.messaging.jms.MessagingUtils; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class TestNotificationListener extends HCatBaseTest implements MessageListener { + + private List actualMessages = new ArrayList(); + + @Before + public void setUp() throws Exception { + System.setProperty("java.naming.factory.initial", + "org.apache.activemq.jndi.ActiveMQInitialContextFactory"); + System.setProperty("java.naming.provider.url", + "vm://localhost?broker.persistent=false"); + ConnectionFactory connFac = new ActiveMQConnectionFactory( + "vm://localhost?broker.persistent=false"); + Connection conn = connFac.createConnection(); + conn.start(); + // We want message to be sent when session commits, thus we run in + // transacted mode. + Session session = conn.createSession(true, Session.SESSION_TRANSACTED); + Destination hcatTopic = session + .createTopic(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX); + MessageConsumer consumer1 = session.createConsumer(hcatTopic); + consumer1.setMessageListener(this); + Destination tblTopic = session + .createTopic(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".mydb.mytbl"); + MessageConsumer consumer2 = session.createConsumer(tblTopic); + consumer2.setMessageListener(this); + Destination dbTopic = session + .createTopic(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".mydb"); + MessageConsumer consumer3 = session.createConsumer(dbTopic); + consumer3.setMessageListener(this); + + setUpHiveConf(); + hiveConf.set(ConfVars.METASTORE_EVENT_LISTENERS.varname, + NotificationListener.class.getName()); + SessionState.start(new CliSessionState(hiveConf)); + driver = new Driver(hiveConf); + client = new HiveMetaStoreClient(hiveConf); + } + + @After + public void tearDown() throws Exception { + List expectedMessages = Arrays.asList( + HCatConstants.HCAT_CREATE_DATABASE_EVENT, + HCatConstants.HCAT_CREATE_TABLE_EVENT, + HCatConstants.HCAT_ADD_PARTITION_EVENT, + HCatConstants.HCAT_DROP_PARTITION_EVENT, + HCatConstants.HCAT_DROP_TABLE_EVENT, + HCatConstants.HCAT_DROP_DATABASE_EVENT); + Assert.assertEquals(expectedMessages, actualMessages); + } + + @Test + public void testAMQListener() throws Exception { + driver.run("create database mydb"); + driver.run("use mydb"); + driver.run("create table mytbl (a string) partitioned by (b string)"); + driver.run("alter table mytbl add partition(b='2011')"); + Map kvs = new HashMap(1); + kvs.put("b", "2011"); + client.markPartitionForEvent("mydb", "mytbl", kvs, + PartitionEventType.LOAD_DONE); + driver.run("alter table mytbl drop partition(b='2011')"); + driver.run("drop table mytbl"); + driver.run("drop database mydb"); + } + + @Override + public void onMessage(Message msg) { + String event; + try { + event = msg.getStringProperty(HCatConstants.HCAT_EVENT); + String format = msg.getStringProperty(HCatConstants.HCAT_MESSAGE_FORMAT); + String version = msg.getStringProperty(HCatConstants.HCAT_MESSAGE_VERSION); + String messageBody = ((TextMessage)msg).getText(); + actualMessages.add(event); + MessageDeserializer deserializer = MessageFactory.getDeserializer(format, version); + + if (event.equals(HCatConstants.HCAT_CREATE_DATABASE_EVENT)) { + + Assert.assertEquals("topic://" + HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX, msg + .getJMSDestination().toString()); + CreateDatabaseMessage message = deserializer.getCreateDatabaseMessage(messageBody); + Assert.assertEquals("mydb", message.getDB()); + HCatEventMessage message2 = MessagingUtils.getMessage(msg); + Assert.assertTrue("Unexpected message-type.", message2 instanceof CreateDatabaseMessage); + Assert.assertEquals("mydb", message2.getDB()); + } else if (event.equals(HCatConstants.HCAT_CREATE_TABLE_EVENT)) { + + Assert.assertEquals("topic://hcat.mydb", msg.getJMSDestination().toString()); + CreateTableMessage message = deserializer.getCreateTableMessage(messageBody); + Assert.assertEquals("mytbl", message.getTable()); + Assert.assertEquals("mydb", message.getDB()); + HCatEventMessage message2 = MessagingUtils.getMessage(msg); + Assert.assertTrue("Unexpected message-type.", message2 instanceof CreateTableMessage); + Assert.assertEquals("mydb", message2.getDB()); + Assert.assertEquals("mytbl", ((CreateTableMessage) message2).getTable()); + } else if (event.equals(HCatConstants.HCAT_ADD_PARTITION_EVENT)) { + + Assert.assertEquals("topic://hcat.mydb.mytbl", msg.getJMSDestination() + .toString()); + AddPartitionMessage message = deserializer.getAddPartitionMessage(messageBody); + Assert.assertEquals("mytbl", message.getTable()); + Assert.assertEquals("mydb", message.getDB()); + Assert.assertEquals(1, message.getPartitions().size()); + Assert.assertEquals("2011", message.getPartitions().get(0).get("b")); + HCatEventMessage message2 = MessagingUtils.getMessage(msg); + Assert.assertTrue("Unexpected message-type.", message2 instanceof AddPartitionMessage); + Assert.assertEquals("mydb", message2.getDB()); + Assert.assertEquals("mytbl", ((AddPartitionMessage) message2).getTable()); + Assert.assertEquals(1, ((AddPartitionMessage) message2).getPartitions().size()); + Assert.assertEquals("2011", ((AddPartitionMessage) message2).getPartitions().get(0).get("b")); + } else if (event.equals(HCatConstants.HCAT_DROP_PARTITION_EVENT)) { + + Assert.assertEquals("topic://hcat.mydb.mytbl", msg.getJMSDestination() + .toString()); + DropPartitionMessage message = deserializer.getDropPartitionMessage(messageBody); + Assert.assertEquals("mytbl", message.getTable()); + Assert.assertEquals("mydb", message.getDB()); + Assert.assertEquals(1, message.getPartitions().size()); + Assert.assertEquals("2011", message.getPartitions().get(0).get("b")); + HCatEventMessage message2 = MessagingUtils.getMessage(msg); + Assert.assertTrue("Unexpected message-type.", message2 instanceof DropPartitionMessage); + Assert.assertEquals("mydb", message2.getDB()); + Assert.assertEquals("mytbl", ((DropPartitionMessage) message2).getTable()); + Assert.assertEquals(1, ((DropPartitionMessage) message2).getPartitions().size()); + Assert.assertEquals("2011", ((DropPartitionMessage) message2).getPartitions().get(0).get("b")); + } else if (event.equals(HCatConstants.HCAT_DROP_TABLE_EVENT)) { + + Assert.assertEquals("topic://hcat.mydb", msg.getJMSDestination().toString()); + DropTableMessage message = deserializer.getDropTableMessage(messageBody); + Assert.assertEquals("mytbl", message.getTable()); + Assert.assertEquals("mydb", message.getDB()); + HCatEventMessage message2 = MessagingUtils.getMessage(msg); + Assert.assertTrue("Unexpected message-type.", message2 instanceof DropTableMessage); + Assert.assertEquals("mydb", message2.getDB()); + Assert.assertEquals("mytbl", ((DropTableMessage) message2).getTable()); + } else if (event.equals(HCatConstants.HCAT_DROP_DATABASE_EVENT)) { + + Assert.assertEquals("topic://" + HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX, msg + .getJMSDestination().toString()); + DropDatabaseMessage message = deserializer.getDropDatabaseMessage(messageBody); + Assert.assertEquals("mydb", message.getDB()); + HCatEventMessage message2 = MessagingUtils.getMessage(msg); + Assert.assertTrue("Unexpected message-type.", message2 instanceof DropDatabaseMessage); + Assert.assertEquals("mydb", message2.getDB()); + } else if (event.equals(HCatConstants.HCAT_PARTITION_DONE_EVENT)) { + // TODO: Fill in when PARTITION_DONE_EVENT is supported. + Assert.assertTrue("Unexpected: HCAT_PARTITION_DONE_EVENT not supported (yet).", false); + } else { + Assert.assertTrue("Unexpected event-type: " + event, false); + } + + } catch (JMSException e) { + e.printStackTrace(System.err); + assert false; + } + } +} diff --git hcatalog/src/docs/src/documentation/content/xdocs/authorization.xml hcatalog/src/docs/src/documentation/content/xdocs/authorization.xml index 571900e..c44d5fe 100644 --- hcatalog/src/docs/src/documentation/content/xdocs/authorization.xml +++ hcatalog/src/docs/src/documentation/content/xdocs/authorization.xml @@ -185,7 +185,7 @@ <property> <name>hive.security.authorization.manager</name> - <value>org.apache.hcatalog.security.HdfsAuthorizationProvider</value> + <value>org.apache.hive.hcatalog.security.HdfsAuthorizationProvider</value> <description>the hive client authorization manager class name. The user defined authorization class should implement interface org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider. diff --git hcatalog/src/docs/src/documentation/content/xdocs/configuration.xml hcatalog/src/docs/src/documentation/content/xdocs/configuration.xml index 0739e72..b51d5a0 100644 --- hcatalog/src/docs/src/documentation/content/xdocs/configuration.xml +++ hcatalog/src/docs/src/documentation/content/xdocs/configuration.xml @@ -228,7 +228,7 @@ Using localhost in metastore uri does not work with kerberos security.
- + diff --git hcatalog/src/docs/src/documentation/content/xdocs/install.xml hcatalog/src/docs/src/documentation/content/xdocs/install.xml index bc97a3a..eae39d3 100644 --- hcatalog/src/docs/src/documentation/content/xdocs/install.xml +++ hcatalog/src/docs/src/documentation/content/xdocs/install.xml @@ -186,7 +186,7 @@ - + diff --git hcatalog/src/docs/src/documentation/content/xdocs/listproperties.xml hcatalog/src/docs/src/documentation/content/xdocs/listproperties.xml index 3b1cb4e..a653a95 100644 --- hcatalog/src/docs/src/documentation/content/xdocs/listproperties.xml +++ hcatalog/src/docs/src/documentation/content/xdocs/listproperties.xml @@ -86,10 +86,10 @@ "properties": { "fruit": "apple", "last_modified_by": "ctdean", - "hcat.osd": "org.apache.hcatalog.rcfile.RCFileOutputDriver", + "hcat.osd": "org.apache.hive.hcatalog.rcfile.RCFileOutputDriver", "color": "blue", "last_modified_time": "1331620706", - "hcat.isd": "org.apache.hcatalog.rcfile.RCFileInputDriver", + "hcat.isd": "org.apache.hive.hcatalog.rcfile.RCFileInputDriver", "transient_lastDdlTime": "1331620706", "comment": "Best table made today", "country": "Albania" diff --git hcatalog/src/docs/src/documentation/content/xdocs/listtables.xml hcatalog/src/docs/src/documentation/content/xdocs/listtables.xml index f988be1..7d47689 100644 --- hcatalog/src/docs/src/documentation/content/xdocs/listtables.xml +++ hcatalog/src/docs/src/documentation/content/xdocs/listtables.xml @@ -100,10 +100,10 @@ at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1332) at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1123) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:931) - at org.apache.hcatalog.cli.HCatDriver.run(HCatDriver.java:42) - at org.apache.hcatalog.cli.HCatCli.processCmd(HCatCli.java:247) - at org.apache.hcatalog.cli.HCatCli.processLine(HCatCli.java:203) - at org.apache.hcatalog.cli.HCatCli.main(HCatCli.java:162) + at org.apache.hive.hcatalog.cli.HCatDriver.run(HCatDriver.java:42) + at org.apache.hive.hcatalog.cli.HCatCli.processCmd(HCatCli.java:247) + at org.apache.hive.hcatalog.cli.HCatCli.processLine(HCatCli.java:203) + at org.apache.hive.hcatalog.cli.HCatCli.main(HCatCli.java:162) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) diff --git hcatalog/src/docs/src/documentation/content/xdocs/loadstore.xml hcatalog/src/docs/src/documentation/content/xdocs/loadstore.xml index 3624ddb..fddb270 100644 --- hcatalog/src/docs/src/documentation/content/xdocs/loadstore.xml +++ hcatalog/src/docs/src/documentation/content/xdocs/loadstore.xml @@ -44,7 +44,7 @@ required for these interfaces.

HCatLoader is accessed via a Pig load statement.

-A = LOAD 'tablename' USING org.apache.hcatalog.pig.HCatLoader(); +A = LOAD 'tablename' USING org.apache.hive.hcatalog.pig.HCatLoader();

Assumptions

@@ -285,7 +285,7 @@ and to be able to authenticate to the HCatalog server.

This load statement will load all partitions of the specified table.

/* myscript.pig */ -A = LOAD 'tablename' USING org.apache.hcatalog.pig.HCatLoader(); +A = LOAD 'tablename' USING org.apache.hive.hcatalog.pig.HCatLoader(); ... ... @@ -296,7 +296,7 @@ might not immediately follow its load statement.) The filter statement can include conditions on partition as well as non-partition columns.

/* myscript.pig */ -A = LOAD 'tablename' USING org.apache.hcatalog.pig.HCatLoader(); +A = LOAD 'tablename' USING org.apache.hive.hcatalog.pig.HCatLoader(); -- date is a partition column; age is not B = filter A by date == '20100819' and age < 30; @@ -310,7 +310,7 @@ C = filter A by date == '20100819' and country == 'US';

To scan a whole table, for example:

-a = load 'student_data' using org.apache.hcatalog.pig.HCatLoader(); +a = load 'student_data' using org.apache.hive.hcatalog.pig.HCatLoader(); b = foreach a generate name, age; @@ -321,7 +321,7 @@ declare name and age as fields, as if you were loading from a file.

datestamp, for example:

-a = load 'web_logs' using org.apache.hcatalog.pig.HCatLoader(); +a = load 'web_logs' using org.apache.hive.hcatalog.pig.HCatLoader(); b = filter a by datestamp == '20110924'; @@ -329,7 +329,7 @@ b = filter a by datestamp == '20110924'; datestamp = '20110924'. You can combine this filter with others via 'and':

-a = load 'web_logs' using org.apache.hcatalog.pig.HCatLoader(); +a = load 'web_logs' using org.apache.hive.hcatalog.pig.HCatLoader(); b = filter a by datestamp == '20110924' and user is not null; @@ -345,26 +345,26 @@ and '>='.

For example:

-a = load 'web_logs' using org.apache.hcatalog.pig.HCatLoader(); +a = load 'web_logs' using org.apache.hive.hcatalog.pig.HCatLoader(); b = filter a by datestamp > '20110924';

A complex filter can have various combinations of operators, such as:

-a = load 'web_logs' using org.apache.hcatalog.pig.HCatLoader(); +a = load 'web_logs' using org.apache.hive.hcatalog.pig.HCatLoader(); b = filter a by datestamp == '20110924' or datestamp == '20110925';

These two examples have the same effect:

-a = load 'web_logs' using org.apache.hcatalog.pig.HCatLoader(); +a = load 'web_logs' using org.apache.hive.hcatalog.pig.HCatLoader(); b = filter a by datestamp >= '20110924' and datestamp <= '20110925'; -a = load 'web_logs' using org.apache.hcatalog.pig.HCatLoader(); +a = load 'web_logs' using org.apache.hive.hcatalog.pig.HCatLoader(); b = filter a by datestamp <= '20110925' and datestamp >= '20110924'; @@ -391,7 +391,7 @@ B = FOREACH A ... my_processed_data = ... STORE my_processed_data INTO 'tablename' - USING org.apache.hcatalog.pig.HCatStorer(); + USING org.apache.hive.hcatalog.pig.HCatStorer();

Assumptions

@@ -414,17 +414,17 @@ should NOT be quoted.

You can write to a non-partitioned table simply by using HCatStorer. The contents of the table will be overwritten:

-store z into 'web_data' using org.apache.hcatalog.pig.HCatStorer(); +store z into 'web_data' using org.apache.hive.hcatalog.pig.HCatStorer();

To add one new partition to a partitioned table, specify the partition value in the store function. Pay careful attention to the quoting, as the whole string must be single quoted and separated with an equals sign:

-store z into 'web_data' using org.apache.hcatalog.pig.HCatStorer('datestamp=20110924'); +store z into 'web_data' using org.apache.hive.hcatalog.pig.HCatStorer('datestamp=20110924');

To write into multiple partitions at once, make sure that the partition column is present in your data, then call HCatStorer with no argument:

-store z into 'web_data' using org.apache.hcatalog.pig.HCatStorer(); +store z into 'web_data' using org.apache.hive.hcatalog.pig.HCatStorer(); -- datestamp must be a field in the relation z diff --git hcatalog/src/packages/templates/conf/hive-site.xml.template hcatalog/src/packages/templates/conf/hive-site.xml.template index fc5794f..5f89f4a 100644 --- hcatalog/src/packages/templates/conf/hive-site.xml.template +++ hcatalog/src/packages/templates/conf/hive-site.xml.template @@ -87,7 +87,7 @@ hive.semantic.analyzer.factory.impl - org.apache.hcatalog.cli.HCatSemanticAnalyzerFactory + org.apache.hive.hcatalog.cli.HCatSemanticAnalyzerFactory controls which SemanticAnalyzerFactory implemenation class is used by CLI diff --git hcatalog/src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm hcatalog/src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm index 09c1683..512d12a 100644 --- hcatalog/src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm +++ hcatalog/src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm @@ -145,7 +145,7 @@ sub runTest $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".$i.out"; $tableName = $results[$i]; $modifiedTestCmd{'num'} = $testCmd->{'num'} . "_" . $i . "_benchmark"; - $modifiedTestCmd{'pig'} = "a = load '$tableName' using org.apache.hcatalog.pig.HCatLoader(); store a into ':OUTPATH:';"; + $modifiedTestCmd{'pig'} = "a = load '$tableName' using org.apache.hive.hcatalog.pig.HCatLoader(); store a into ':OUTPATH:';"; my $r = $self->runPig(\%modifiedTestCmd, $log, 1); $outputs[$i] = $r->{'output'}; } else { @@ -185,7 +185,7 @@ sub dumpPigTable my $outfile = $testCmd->{'outpath'} . $testCmd->{'group'} . "_" . $testCmd->{'num'} . $id . "dump.out"; open(FH, "> $pigfile") or die "Unable to open file $pigfile to write pig script, $ERRNO\n"; - print FH "a = load '$table' using org.apache.hcatalog.pig.HCatLoader(); store a into '$outfile';\n"; + print FH "a = load '$table' using org.apache.hive.hcatalog.pig.HCatLoader(); store a into '$outfile';\n"; close(FH); diff --git hcatalog/src/test/e2e/hcatalog/drivers/TestDriverPig.pm hcatalog/src/test/e2e/hcatalog/drivers/TestDriverPig.pm index 7dca963..fa321ca 100644 --- hcatalog/src/test/e2e/hcatalog/drivers/TestDriverPig.pm +++ hcatalog/src/test/e2e/hcatalog/drivers/TestDriverPig.pm @@ -184,7 +184,7 @@ sub runTest $testCmd->{'group'} . "_" . $testCmd->{'num'} . ".$i.out"; $tableName = $results[$i]; $modifiedTestCmd{'num'} = $testCmd->{'num'} . "_" . $i . "_benchmark"; - $modifiedTestCmd{'pig'} = "a = load '$tableName' using org.apache.hcatalog.pig.HCatLoader(); store a into ':OUTPATH:';"; + $modifiedTestCmd{'pig'} = "a = load '$tableName' using org.apache.hive.hcatalog.pig.HCatLoader(); store a into ':OUTPATH:';"; my $r = $self->runPig(\%modifiedTestCmd, $log, 1, 1); $outputs[$i] = $r->{'output'}; } else { diff --git hcatalog/src/test/e2e/hcatalog/tests/hadoop.conf hcatalog/src/test/e2e/hcatalog/tests/hadoop.conf index 41e5eef..18020e2 100644 --- hcatalog/src/test/e2e/hcatalog/tests/hadoop.conf +++ hcatalog/src/test/e2e/hcatalog/tests/hadoop.conf @@ -40,7 +40,7 @@ $cfg = { { 'num' => 1 ,'hadoop' => q\ -jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.SimpleRead -libjars :HCAT_JAR: :THRIFTSERVER: studenttab10k :OUTPATH: +jar :FUNCPATH:/testudf.jar org.apache.hive.hcatalog.utils.SimpleRead -libjars :HCAT_JAR: :THRIFTSERVER: studenttab10k :OUTPATH: \, ,'sql' => q\select name, age from studenttab10k;\ ,'floatpostprocess' => 1 @@ -51,7 +51,7 @@ jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.SimpleRead -libjars :HCAT_J ,'hcat_prep'=>q\drop table if exists hadoop_checkin_2; create table hadoop_checkin_2 (name string, age int, gpa double) STORED AS TEXTFILE;\ ,'hadoop' => q\ -jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadWrite -libjars :HCAT_JAR: :THRIFTSERVER: studenttab10k hadoop_checkin_2 +jar :FUNCPATH:/testudf.jar org.apache.hive.hcatalog.utils.ReadWrite -libjars :HCAT_JAR: :THRIFTSERVER: studenttab10k hadoop_checkin_2 \, ,'result_table' => 'hadoop_checkin_2' ,'sql' => q\select * from studenttab10k;\ @@ -63,7 +63,7 @@ jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadWrite -libjars :HCAT_JA ,'hcat_prep'=>q\drop table if exists hadoop_checkin_3; create table hadoop_checkin_3 (age int, cnt int) STORED AS TEXTFILE;\ ,'hadoop' => q\ -jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.GroupByAge -libjars :HCAT_JAR: :THRIFTSERVER: studenttab10k hadoop_checkin_3 +jar :FUNCPATH:/testudf.jar org.apache.hive.hcatalog.utils.GroupByAge -libjars :HCAT_JAR: :THRIFTSERVER: studenttab10k hadoop_checkin_3 \, ,'result_table' => 'hadoop_checkin_3' ,'sql' => q\select age, count(*) from studenttab10k group by age;\ @@ -71,7 +71,7 @@ jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.GroupByAge -libjars :HCAT_J # Read from a partitioned table 'num' => 4 ,'hadoop' => q\ -jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.SimpleRead -libjars :HCAT_JAR: :THRIFTSERVER: studentparttab30k :OUTPATH: +jar :FUNCPATH:/testudf.jar org.apache.hive.hcatalog.utils.SimpleRead -libjars :HCAT_JAR: :THRIFTSERVER: studentparttab30k :OUTPATH: \, ,'sql' => q\select name, age from studentparttab30k;\ ,'floatpostprocess' => 1 @@ -82,7 +82,7 @@ jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.SimpleRead -libjars :HCAT_J ,'hcat_prep'=>q\drop table if exists hadoop_checkin_5; create table hadoop_checkin_5 (name string, age int) partitioned by (ds string) STORED AS TEXTFILE;\ ,'hadoop' => q? -jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteTextPartitioned -libjars :HCAT_JAR: :THRIFTSERVER: studentparttab30k hadoop_checkin_5 ds=\"20110924\" +jar :FUNCPATH:/testudf.jar org.apache.hive.hcatalog.utils.WriteTextPartitioned -libjars :HCAT_JAR: :THRIFTSERVER: studentparttab30k hadoop_checkin_5 ds=\"20110924\" ?, ,'result_table' => 'hadoop_checkin_5' ,'sql' => q\select name, age, ds from studentparttab30k where ds='20110924';\ @@ -94,7 +94,7 @@ jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteTextPartitioned -libja ,'hcat_prep'=>q\drop table if exists hadoop_checkin_6; create table hadoop_checkin_6 (name string, age int) partitioned by (ds string) STORED AS TEXTFILE;\ ,'hadoop' => q\ -jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteTextPartitioned -libjars :HCAT_JAR: :THRIFTSERVER: studentparttab30k hadoop_checkin_6 +jar :FUNCPATH:/testudf.jar org.apache.hive.hcatalog.utils.WriteTextPartitioned -libjars :HCAT_JAR: :THRIFTSERVER: studentparttab30k hadoop_checkin_6 \, ,'result_table' => 'hadoop_checkin_6' ,'sql' => q\select name, age, ds from studentparttab30k;\ @@ -109,7 +109,7 @@ jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteTextPartitioned -libja { 'num' => 1 ,'hadoop' => q\ -jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadText -libjars :HCAT_JAR: :THRIFTSERVER: all100k :OUTPATH: +jar :FUNCPATH:/testudf.jar org.apache.hive.hcatalog.utils.ReadText -libjars :HCAT_JAR: :THRIFTSERVER: all100k :OUTPATH: \, ,'sql' => q\select * from all100k;\ ,'floatpostprocess' => 1 @@ -118,7 +118,7 @@ jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadText -libjars :HCAT_JAR { 'num' => 2 ,'hadoop' => q\ -jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadJson -libjars :HCAT_JAR: :THRIFTSERVER: all100kjson :OUTPATH: +jar :FUNCPATH:/testudf.jar org.apache.hive.hcatalog.utils.ReadJson -libjars :HCAT_JAR: :THRIFTSERVER: all100kjson :OUTPATH: \, ,'sql' => q\select s, i, d from all100kjson;\ ,'floatpostprocess' => 1 @@ -127,7 +127,7 @@ jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadJson -libjars :HCAT_JAR { 'num' => 3 ,'hadoop' => q\ -jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.ReadRC -libjars :HCAT_JAR: :THRIFTSERVER: all100krc :OUTPATH: +jar :FUNCPATH:/testudf.jar org.apache.hive.hcatalog.utils.ReadRC -libjars :HCAT_JAR: :THRIFTSERVER: all100krc :OUTPATH: \, ,'sql' => q\select name, age, floor(gpa) + 0.1 from all100krc;\ ,'floatpostprocess' => 1 @@ -155,7 +155,7 @@ create table hadoop_write_1( fields terminated by ':' stored as textfile;\ ,'hadoop' => q\ -jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteText -libjars :HCAT_JAR: :THRIFTSERVER: all100k hadoop_write_1 +jar :FUNCPATH:/testudf.jar org.apache.hive.hcatalog.utils.WriteText -libjars :HCAT_JAR: :THRIFTSERVER: all100k hadoop_write_1 \, ,'result_table' => 'hadoop_write_1' ,'sql' => q\select * from all100k;\ @@ -172,10 +172,10 @@ create table hadoop_write_2( d double, m map, bb array>) - row format serde 'org.apache.hcatalog.data.JsonSerDe' + row format serde 'org.apache.hive.hcatalog.data.JsonSerDe' stored as textfile;\ ,'hadoop' => q\ -jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteJson -libjars :HCAT_JAR: :THRIFTSERVER: all100kjson hadoop_write_2 +jar :FUNCPATH:/testudf.jar org.apache.hive.hcatalog.utils.WriteJson -libjars :HCAT_JAR: :THRIFTSERVER: all100kjson hadoop_write_2 \, ,'result_table' => 'hadoop_write_2' ,'sql' => q\select s, i, d, '', '' from all100kjson;\ @@ -193,7 +193,7 @@ create table hadoop_write_3( stored as rcfile; \, ,'hadoop' => q\ -jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteRC -libjars :HCAT_JAR: :THRIFTSERVER: all100krc hadoop_write_3 +jar :FUNCPATH:/testudf.jar org.apache.hive.hcatalog.utils.WriteRC -libjars :HCAT_JAR: :THRIFTSERVER: all100krc hadoop_write_3 \, ,'result_table' => 'hadoop_write_3' ,'sql' => q\select name, age, floor(gpa) + 0.1 from all100krc;\ @@ -210,7 +210,7 @@ create table hadoop_write_4( stored as sequencefile; \, ,'hadoop' => q\ -jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteRC -libjars :HCAT_JAR: :THRIFTSERVER: all100krc hadoop_write_4 +jar :FUNCPATH:/testudf.jar org.apache.hive.hcatalog.utils.WriteRC -libjars :HCAT_JAR: :THRIFTSERVER: all100krc hadoop_write_4 \, ,'result_table' => 'hadoop_write_4' ,'sql' => q\select name, age, floor(gpa) + 0.1 from all100krc;\ @@ -225,9 +225,9 @@ jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.WriteRC -libjars :HCAT_JAR: { 'num' => 1 ,'hcat_prep'=>q\drop table if exists hadoop_hbase_1; -create table hadoop_hbase_1(key string, gpa string) STORED BY 'org.apache.hcatalog.hbase.HBaseHCatStorageHandler' TBLPROPERTIES ('hbase.columns.mapping'=':key,info:gpa');\ +create table hadoop_hbase_1(key string, gpa string) STORED BY 'org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler' TBLPROPERTIES ('hbase.columns.mapping'=':key,info:gpa');\ ,'hadoop' => q\ -jar :FUNCPATH:/testudf.jar org.apache.hcatalog.utils.HBaseReadWrite -libjars :HCAT_JAR: :THRIFTSERVER: :INPATH:/studenttab10k hadoop_hbase_1 :OUTPATH: +jar :FUNCPATH:/testudf.jar org.apache.hive.hcatalog.utils.HBaseReadWrite -libjars :HCAT_JAR: :THRIFTSERVER: :INPATH:/studenttab10k hadoop_hbase_1 :OUTPATH: \, ,'sql' => q\select name, sum(gpa) from studenttab10k group by name;\ ,'floatpostprocess' => 1 diff --git hcatalog/src/test/e2e/hcatalog/tests/hive.conf hcatalog/src/test/e2e/hcatalog/tests/hive.conf index 2adee23..c532a9e 100644 --- hcatalog/src/test/e2e/hcatalog/tests/hive.conf +++ hcatalog/src/test/e2e/hcatalog/tests/hive.conf @@ -167,7 +167,7 @@ insert into TABLE hive_write_1 select t, si, i, b, s from all100k;\, 'num' => 2, 'sql' => q\ drop table if exists hive_write_2; -create table hive_write_2 (name string, age int, gpa double) row format serde 'org.apache.hcatalog.data.JsonSerDe' stored as textfile; +create table hive_write_2 (name string, age int, gpa double) row format serde 'org.apache.hive.hcatalog.data.JsonSerDe' stored as textfile; insert into TABLE hive_write_2 select s, i, 0.1 from all100kjson;\, 'result_table' => 'hive_write_2', 'verify_sql' =>"select s, i, 0.1 from all100kjson;", diff --git hcatalog/src/test/e2e/hcatalog/tests/pig.conf hcatalog/src/test/e2e/hcatalog/tests/pig.conf index 5b0ad9b..4c03fe4 100644 --- hcatalog/src/test/e2e/hcatalog/tests/pig.conf +++ hcatalog/src/test/e2e/hcatalog/tests/pig.conf @@ -42,8 +42,8 @@ $cfg = { 'num' => 1 ,'hcat_prep'=>q\drop table if exists pig_checkin_1; create table pig_checkin_1 (name string, age int, gpa double) STORED AS TEXTFILE;\ - ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader(); -store a into 'pig_checkin_1' using org.apache.hcatalog.pig.HCatStorer();\, + ,'pig' => q\a = load 'studenttab10k' using org.apache.hive.hcatalog.pig.HCatLoader(); +store a into 'pig_checkin_1' using org.apache.hive.hcatalog.pig.HCatStorer();\, ,'result_table' => 'pig_checkin_1' ,'sql' => q\select * from studenttab10k;\ ,'floatpostprocess' => 1 @@ -51,8 +51,8 @@ store a into 'pig_checkin_1' using org.apache.hcatalog.pig.HCatStorer();\, }, { 'num' => 2 - ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader(); -b = load 'votertab10k' using org.apache.hcatalog.pig.HCatLoader(); + ,'pig' => q\a = load 'studenttab10k' using org.apache.hive.hcatalog.pig.HCatLoader(); +b = load 'votertab10k' using org.apache.hive.hcatalog.pig.HCatLoader(); c = join a by name, b by name; store c into ':OUTPATH:';\, ,'sql' => [ 'select s.name, s.age, gpa, v.name, v.age, registration, contributions from studenttab10k s join votertab10k v on (s.name = v.name);'] @@ -61,7 +61,7 @@ store c into ':OUTPATH:';\, }, { 'num' => 3 - ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader(); + ,'pig' => q\a = load 'studenttab10k' using org.apache.hive.hcatalog.pig.HCatLoader(); b = load ':INPATH:/votertab10k' as (name:chararray, age:int, registration:chararray, contributions:float); c = join a by name, b by name; store c into ':OUTPATH:';\ @@ -75,10 +75,10 @@ store c into ':OUTPATH:';\ drop table if exists pig_checkin_4_2; create table pig_checkin_4_1 (name string, age int, gpa double) STORED AS TEXTFILE; create table pig_checkin_4_2 (name string, age int, gpa double) STORED AS TEXTFILE;\ - ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader(); + ,'pig' => q\a = load 'studenttab10k' using org.apache.hive.hcatalog.pig.HCatLoader(); split a into b if age <=40, c if age > 40; -store b into 'pig_checkin_4_1' using org.apache.hcatalog.pig.HCatStorer(); -store c into 'pig_checkin_4_2' using org.apache.hcatalog.pig.HCatStorer();\, +store b into 'pig_checkin_4_1' using org.apache.hive.hcatalog.pig.HCatStorer(); +store c into 'pig_checkin_4_2' using org.apache.hive.hcatalog.pig.HCatStorer();\, ,'result_table' => ['pig_checkin_4_1','pig_checkin_4_2'] ,'sql' => [ 'select * from studenttab10k where age<=40;', 'select * from studenttab10k where age>40;'] ,'floatpostprocess' => 1 @@ -88,9 +88,9 @@ store c into 'pig_checkin_4_2' using org.apache.hcatalog.pig.HCatStorer();\, 'num' => 5 ,'hcat_prep'=>q\drop table if exists pig_checkin_5; create table pig_checkin_5 (name string, age int, gpa double) STORED AS TEXTFILE;\ - ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader(); + ,'pig' => q\a = load 'studenttab10k' using org.apache.hive.hcatalog.pig.HCatLoader(); split a into b if age <=40, c if age > 40; -store b into 'pig_checkin_5' using org.apache.hcatalog.pig.HCatStorer(); +store b into 'pig_checkin_5' using org.apache.hive.hcatalog.pig.HCatStorer(); store c into ':OUTPATH:';\, ,'result_table' => ['pig_checkin_5','?'] ,'sql' => [ 'select * from studenttab10k where age<=40;', 'select * from studenttab10k where age>40;'] @@ -101,10 +101,10 @@ store c into ':OUTPATH:';\, 'num' => 6 ,'hcat_prep'=>q\drop table if exists pig_checkin_6; create table pig_checkin_6 (name string, age int) partitioned by (ds string) STORED AS TEXTFILE;\ - ,'pig' => q\a = load 'studentparttab30k' using org.apache.hcatalog.pig.HCatLoader(); + ,'pig' => q\a = load 'studentparttab30k' using org.apache.hive.hcatalog.pig.HCatLoader(); b = filter a by ds == '20110924'; c = foreach b generate name, age; -store c into 'pig_checkin_6' using org.apache.hcatalog.pig.HCatStorer('ds=20110924');\, +store c into 'pig_checkin_6' using org.apache.hive.hcatalog.pig.HCatStorer('ds=20110924');\, #dump a;\, ,'result_table' => 'pig_checkin_6', ,'sql' => "select name, age, ds from studentparttab30k where ds='20110924';", @@ -115,9 +115,9 @@ store c into 'pig_checkin_6' using org.apache.hcatalog.pig.HCatStorer('ds=201109 'num' => 7 ,'hcat_prep'=>q\drop table if exists pig_checkin_7; create table pig_checkin_7 (name string, age int) partitioned by (ds string) STORED AS TEXTFILE;\ - ,'pig' => q\a = load 'studentparttab30k' using org.apache.hcatalog.pig.HCatLoader(); + ,'pig' => q\a = load 'studentparttab30k' using org.apache.hive.hcatalog.pig.HCatLoader(); b = foreach a generate name, age, ds; -store b into 'pig_checkin_7' using org.apache.hcatalog.pig.HCatStorer();\, +store b into 'pig_checkin_7' using org.apache.hive.hcatalog.pig.HCatStorer();\, ,'result_table' => 'pig_checkin_7', ,'sql' => "select name, age, ds from studentparttab30k;", ,'floatpostprocess' => 1 @@ -133,7 +133,7 @@ store b into 'pig_checkin_7' using org.apache.hcatalog.pig.HCatStorer();\, { 'ignore' => 1, # Need to checkin HCATALOG-168. 'num' => 1 - ,'pig' => q\a = load 'all100k' using org.apache.hcatalog.pig.HCatLoader(); + ,'pig' => q\a = load 'all100k' using org.apache.hive.hcatalog.pig.HCatLoader(); store a into ':OUTPATH:';\, ,'sql' => q\select * from all100k;\ ,'floatpostprocess' => 1 @@ -141,7 +141,7 @@ store a into ':OUTPATH:';\, }, { 'num' => 2 - ,'pig' => q\a = load 'all100kjson' using org.apache.hcatalog.pig.HCatLoader(); + ,'pig' => q\a = load 'all100kjson' using org.apache.hive.hcatalog.pig.HCatLoader(); b = foreach a generate s, i, d; store b into ':OUTPATH:';\, ,'sql' => q\select s, i, d from all100kjson;\ @@ -150,7 +150,7 @@ store b into ':OUTPATH:';\, }, { 'num' => 3 - ,'pig' => q\a = load 'all100krc' using org.apache.hcatalog.pig.HCatLoader(); + ,'pig' => q\a = load 'all100krc' using org.apache.hive.hcatalog.pig.HCatLoader(); b = foreach a generate name, age; store b into ':OUTPATH:';\, ,'sql' => q\select name, age from all100krc;\ @@ -165,7 +165,7 @@ create external table pig_read_4 (name string, age int, gpa double) partitioned alter table pig_read_4 add partition (b='1') location '/user/hcat/tests/data/studenttab10k'; alter table pig_read_4 set fileformat rcfile; alter table pig_read_4 add partition (b='2') location '/user/hcat/tests/data/all100krc';? - ,'pig' => q\a = load 'pig_read_4' using org.apache.hcatalog.pig.HCatLoader(); + ,'pig' => q\a = load 'pig_read_4' using org.apache.hive.hcatalog.pig.HCatLoader(); b = foreach a generate name, age, b; store b into ':OUTPATH:';\, ,'sql' => q\(select name, age, 1 from studenttab10k) @@ -180,7 +180,7 @@ drop table if exists pig_db_1.pig_read_5; create external table pig_db_1.pig_read_5 (name string, age int, gpa double) partitioned by (b string) row format delimited fields terminated by '\t' stored as textfile; use pig_db_1; alter table pig_read_5 add partition (b='1') location '/user/hcat/tests/data/studenttab10k';? - ,'pig' => q\a = load 'pig_db_1.pig_read_5' using org.apache.hcatalog.pig.HCatLoader(); + ,'pig' => q\a = load 'pig_db_1.pig_read_5' using org.apache.hive.hcatalog.pig.HCatLoader(); b = foreach a generate name, age, b; store b into ':OUTPATH:';\, ,'sql' => q\select name, age, 1 from studenttab10k;\ @@ -196,7 +196,7 @@ store b into ':OUTPATH:';\, ,'hcat_prep'=>q\drop table if exists pig_write_1; create table pig_write_1(t tinyint,si smallint,i int,b bigint,bool boolean,f float,d double,s string) stored as rcfile;\ ,'pig' => q\a = load ':INPATH:/all100k' using PigStorage(':') as (t:int,si:int,i:int,b:int,bo:boolean,f:float,d:double,s:chararray); -store a into 'pig_write_1' using org.apache.hcatalog.pig.HCatStorer();\, +store a into 'pig_write_1' using org.apache.hive.hcatalog.pig.HCatStorer();\, ,'result_table' => 'pig_write_1' ,'sql' => q\select * from all100k;\ ,'floatpostprocess' => 1 @@ -211,12 +211,12 @@ create table pig_write_2( d double, m map, bb array>) - row format serde 'org.apache.hcatalog.data.JsonSerDe' + row format serde 'org.apache.hive.hcatalog.data.JsonSerDe' STORED AS TEXTFILE; \ - ,'pig' => q\a = load 'all100kjson' using org.apache.hcatalog.pig.HCatLoader(); + ,'pig' => q\a = load 'all100kjson' using org.apache.hive.hcatalog.pig.HCatLoader(); b = foreach a generate s, i, d; -store b into 'pig_write_2' using org.apache.hcatalog.pig.HCatStorer();\, +store b into 'pig_write_2' using org.apache.hive.hcatalog.pig.HCatStorer();\, ,'sql' => q\select IFNULL(s, ""), IFNULL(i, ""), IFNULL(d, "") from all100kjson;\ ,'result_table' => 'pig_write_2' ,'floatpostprocess' => 1 @@ -231,9 +231,9 @@ create table pig_write_3( gpa double) stored as rcfile; \ - ,'pig' => q\a = load 'all100krc' using org.apache.hcatalog.pig.HCatLoader(); + ,'pig' => q\a = load 'all100krc' using org.apache.hive.hcatalog.pig.HCatLoader(); b = foreach a generate name, age; -store b into 'pig_write_3' using org.apache.hcatalog.pig.HCatStorer();\, +store b into 'pig_write_3' using org.apache.hive.hcatalog.pig.HCatStorer();\, ,'sql' => q\select name, age from all100krc;\ ,'result_table' => 'pig_write_3' ,'floatpostprocess' => 1 @@ -249,10 +249,10 @@ create table pig_write_4( gpa double) stored as sequencefile; \ - ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader(); + ,'pig' => q\a = load 'studenttab10k' using org.apache.hive.hcatalog.pig.HCatLoader(); b = foreach a generate name, age, 0.1; c = foreach b generate name, age, $2 as gpa; -store c into 'pig_write_4' using org.apache.hcatalog.pig.HCatStorer();\, +store c into 'pig_write_4' using org.apache.hive.hcatalog.pig.HCatStorer();\, ,'sql' => q\select name, age, 0.1 from studenttab10k;\ ,'result_table' => 'pig_write_4' ,'floatpostprocess' => 1 @@ -263,9 +263,9 @@ store c into 'pig_write_4' using org.apache.hcatalog.pig.HCatStorer();\, 'num' => 5 ,'hcat_prep'=>q?create database if not exists pig_db_1; create table if not exists pig_db_1.pig_write_5 (name string, age int) row format delimited fields terminated by '\t' stored as textfile;? - ,'pig' => q\a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader(); + ,'pig' => q\a = load 'studenttab10k' using org.apache.hive.hcatalog.pig.HCatLoader(); b = foreach a generate name, age; -store b into 'pig_db_1.pig_write_5' using org.apache.hcatalog.pig.HCatStorer();\, +store b into 'pig_db_1.pig_write_5' using org.apache.hive.hcatalog.pig.HCatStorer();\, ,'sql' => q\select name, age from studenttab10k;\ ,'result_table' => 'pig_db_1.pig_write_5' } @@ -279,10 +279,10 @@ store b into 'pig_db_1.pig_write_5' using org.apache.hcatalog.pig.HCatStorer();\ 'num' => 1 ,'hcat_prep'=>q\drop table if exists pig_change_schema_1; create table pig_change_schema_1 (name string) partitioned by (ds string) STORED AS TEXTFILE;\ - ,'pig' => q\a = load 'studentparttab30k' using org.apache.hcatalog.pig.HCatLoader(); + ,'pig' => q\a = load 'studentparttab30k' using org.apache.hive.hcatalog.pig.HCatLoader(); b = filter a by ds == '20110924'; c = foreach b generate name; -store c into 'pig_change_schema_1' using org.apache.hcatalog.pig.HCatStorer('ds=20110924');\, +store c into 'pig_change_schema_1' using org.apache.hive.hcatalog.pig.HCatStorer('ds=20110924');\, ,'result_table' => 'pig_change_schema_1' ,'sql' => q\select name, ds from studentparttab30k where ds='20110924';\ }, @@ -291,10 +291,10 @@ store c into 'pig_change_schema_1' using org.apache.hcatalog.pig.HCatStorer('ds= 'num' => 2 ,'depends_on' => 'Pig_Change_Schema_1' ,'hcat_prep'=>q\alter table pig_change_schema_1 add columns (age int);\ - ,'pig' => q\a = load 'studentparttab30k' using org.apache.hcatalog.pig.HCatLoader(); + ,'pig' => q\a = load 'studentparttab30k' using org.apache.hive.hcatalog.pig.HCatLoader(); b = filter a by ds == '20110925'; c = foreach b generate name, age; -store c into 'pig_change_schema_1' using org.apache.hcatalog.pig.HCatStorer('ds=20110925');\, +store c into 'pig_change_schema_1' using org.apache.hive.hcatalog.pig.HCatStorer('ds=20110925');\, ,'result_table' => 'pig_change_schema_1' ,'sql' => q\(select name, '', ds from studentparttab30k where ds='20110924') union all @@ -304,7 +304,7 @@ store c into 'pig_change_schema_1' using org.apache.hcatalog.pig.HCatStorer('ds= # I don't like this, I'm using one test to setup for the next. But I don't know how else to do this. 'num' => 3 , 'depends_on' => 'Pig_Change_Schema_2' - ,'pig' => q\a = load 'pig_change_schema_1' using org.apache.hcatalog.pig.HCatLoader(); + ,'pig' => q\a = load 'pig_change_schema_1' using org.apache.hive.hcatalog.pig.HCatLoader(); c = foreach a generate name, age, ds; store c into ':OUTPATH:';\ ,'sql' => q\(select name, '', ds from studentparttab30k where ds='20110924') @@ -319,15 +319,15 @@ store c into ':OUTPATH:';\ { 'num' => 1 ,'hcat_prep'=>q\drop table if exists pig_hbase_1; -create table pig_hbase_1(key string, age string, gpa string) STORED BY 'org.apache.hcatalog.hbase.HBaseHCatStorageHandler' TBLPROPERTIES ('hbase.columns.mapping'=':key,info:age,info:gpa');\ +create table pig_hbase_1(key string, age string, gpa string) STORED BY 'org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler' TBLPROPERTIES ('hbase.columns.mapping'=':key,info:age,info:gpa');\ ,'pig' => q\set hcat.hbase.output.bulkMode 'false' a = load ':INPATH:/studenttab10k' as (name:chararray, age:int, gpa:float); b = group a by name; c = foreach b generate group as name, AVG(a.age) as age, AVG(a.gpa) as gpa; d = foreach c generate name as key, (chararray)age, (chararray)gpa as gpa; -store d into 'pig_hbase_1' using org.apache.hcatalog.pig.HCatStorer(); +store d into 'pig_hbase_1' using org.apache.hive.hcatalog.pig.HCatStorer(); exec -e = load 'pig_hbase_1' using org.apache.hcatalog.pig.HCatLoader(); +e = load 'pig_hbase_1' using org.apache.hive.hcatalog.pig.HCatLoader(); store e into ':OUTPATH:';\, ,'result_table' => ['pig_hbase_1','?'] ,'sql' => [ 'select name, avg(cast(age as decimal(10,5))), avg(gpa) from studenttab10k group by name;', 'select name, avg(cast(age as decimal(10,5))), avg(gpa) from studenttab10k group by name;' ] @@ -338,17 +338,17 @@ store e into ':OUTPATH:';\, # multiquery 'num' => 2 ,'hcat_prep'=>q\drop table if exists pig_hbase_2_1; -create table pig_hbase_2_1(key string, age string, gpa string) STORED BY 'org.apache.hcatalog.hbase.HBaseHCatStorageHandler' TBLPROPERTIES ('hbase.columns.mapping'=':key,info:age,info:gpa'); +create table pig_hbase_2_1(key string, age string, gpa string) STORED BY 'org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler' TBLPROPERTIES ('hbase.columns.mapping'=':key,info:age,info:gpa'); drop table if exists pig_hbase_2_2; -create table pig_hbase_2_2(key string, age string, gpa string) STORED BY 'org.apache.hcatalog.hbase.HBaseHCatStorageHandler' TBLPROPERTIES ('hbase.columns.mapping'=':key,info:age,info:gpa'); +create table pig_hbase_2_2(key string, age string, gpa string) STORED BY 'org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler' TBLPROPERTIES ('hbase.columns.mapping'=':key,info:age,info:gpa'); \ ,'pig' => q\set hcat.hbase.output.bulkMode 'false' a = load ':INPATH:/studenttab10k' as (name:chararray, age:int, gpa:float); b = group a by name; c = foreach b generate group as name, AVG(a.age) as age, AVG(a.gpa) as gpa; d = foreach c generate name as key, (chararray)age, (chararray)gpa as gpa; -store d into 'pig_hbase_2_1' using org.apache.hcatalog.pig.HCatStorer(); -store d into 'pig_hbase_2_2' using org.apache.hcatalog.pig.HCatStorer();\, +store d into 'pig_hbase_2_1' using org.apache.hive.hcatalog.pig.HCatStorer(); +store d into 'pig_hbase_2_2' using org.apache.hive.hcatalog.pig.HCatStorer();\, ,'result_table' => ['pig_hbase_2_1','pig_hbase_2_2'] ,'sql' => [ 'select name, avg(cast(age as decimal(10,5))), avg(gpa) from studenttab10k group by name;', 'select name, avg(cast(age as decimal(10,5))), avg(gpa) from studenttab10k group by name;'] ,'floatpostprocess' => 1 @@ -365,7 +365,7 @@ store d into 'pig_hbase_2_2' using org.apache.hcatalog.pig.HCatStorer();\, create table pig_hcat_barrier_1 (name string, age int, gpa double) partitioned by (b string) CLUSTERED BY (name) INTO 1 BUCKETS STORED AS TEXTFILE;\ ,'pig' => q\ a = load ':INPATH:/studenttab10k' as (name:chararray, age:int, gpa:double); -store a into 'pig_hcat_barrier_1' using org.apache.hcatalog.pig.HCatStorer('b=1'); \, +store a into 'pig_hcat_barrier_1' using org.apache.hive.hcatalog.pig.HCatStorer('b=1'); \, ,'expected_err_regex' => 'not supported' }, { @@ -374,7 +374,7 @@ store a into 'pig_hcat_barrier_1' using org.apache.hcatalog.pig.HCatStorer('b=1' create table pig_hcat_barrier_2 (name string, age int, gpa double) partitioned by (b string) CLUSTERED BY (name) SORTED BY (name) INTO 1 BUCKETS STORED AS TEXTFILE;\ ,'pig' => q\ a = load ':INPATH:/studenttab10k' as (name:chararray, age:int, gpa:double); -store a into 'pig_hcat_barrier_2' using org.apache.hcatalog.pig.HCatStorer('b=1'); \, +store a into 'pig_hcat_barrier_2' using org.apache.hive.hcatalog.pig.HCatStorer('b=1'); \, ,'expected_err_regex' => 'not supported' }, ], @@ -396,7 +396,7 @@ alter table pig_hcat_coop_1 partition(b='2') set fileformat TEXTFILE; alter table pig_hcat_coop_1 replace columns (name string, age int); : ,'pig' => q\ -a = load 'pig_hcat_coop_1' using org.apache.hcatalog.pig.HCatLoader(); +a = load 'pig_hcat_coop_1' using org.apache.hive.hcatalog.pig.HCatLoader(); store a into ':OUTPATH:';\, ,'sql' => q\select name, age, '1' from studenttab10k union all select name, age, '2' from votertab10k;\ ,'floatpostprocess' => 1 @@ -414,7 +414,7 @@ alter table pig_hcat_coop_2 partition(b='2') set fileformat RCFILE; alter table pig_hcat_coop_2 replace columns (age int, name string); : ,'pig' => q\ -a = load 'pig_hcat_coop_2' using org.apache.hcatalog.pig.HCatLoader(); +a = load 'pig_hcat_coop_2' using org.apache.hive.hcatalog.pig.HCatLoader(); store a into ':OUTPATH:';\, ,'sql' => q\select age, name, '1' from studenttab10k union all select age, name, '2' from all100krc;\ ,'floatpostprocess' => 1 @@ -428,7 +428,7 @@ store a into ':OUTPATH:';\, # test reading tuples from the complex table 'num' => 1 ,'pig' => q\ -a = load 'studentcomplextab10k' using org.apache.hcatalog.pig.HCatLoader(); +a = load 'studentcomplextab10k' using org.apache.hive.hcatalog.pig.HCatLoader(); b = foreach a generate flatten(s); store b into ':OUTPATH:';\, ,'sql' => q\select IFNULL(name, ""), IFNULL(age, ""), IFNULL(gpa, "") from studentcomplextab10k;\ @@ -438,7 +438,7 @@ store b into ':OUTPATH:';\, # test reading maps from the complex table 'num' => 2 ,'pig' => q\ -a = load 'studentcomplextab10k' using org.apache.hcatalog.pig.HCatLoader(); +a = load 'studentcomplextab10k' using org.apache.hive.hcatalog.pig.HCatLoader(); b = foreach a generate s.name as n1, m#'name' as n2; c = filter b by n1 != '' and n2 is not null; store c into ':OUTPATH:';\, @@ -451,7 +451,7 @@ store c into ':OUTPATH:';\, # test reading arrays from the complex table 'num' => 3 ,'pig' => q\ -a = load 'studentcomplextab10k' using org.apache.hcatalog.pig.HCatLoader(); +a = load 'studentcomplextab10k' using org.apache.hive.hcatalog.pig.HCatLoader(); b = foreach a generate s.name as n1, flatten(a); c = filter b by n1 != '' ; store c into ':OUTPATH:';\, @@ -467,11 +467,11 @@ store c into ':OUTPATH:';\, ,'hcat_prep'=>q\drop table if exists pig_complex_4; create table pig_complex_4 (s struct) STORED AS TEXTFILE;\ ,'pig' => q\ -a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader(); +a = load 'studenttab10k' using org.apache.hive.hcatalog.pig.HCatLoader(); b = foreach a generate TOTUPLE(name, age, gpa) as s; -store b into 'pig_complex_4' using org.apache.hcatalog.pig.HCatStorer(); +store b into 'pig_complex_4' using org.apache.hive.hcatalog.pig.HCatStorer(); exec; -c = load 'pig_complex_4' using org.apache.hcatalog.pig.HCatLoader(); +c = load 'pig_complex_4' using org.apache.hive.hcatalog.pig.HCatLoader(); d = foreach c generate flatten(s); store d into ':OUTPATH:';\ ,'sql' => q\select name, age, gpa from studenttab10k;\ @@ -484,11 +484,11 @@ store d into ':OUTPATH:';\ ,'hcat_prep'=>q\drop table if exists pig_complex_5; create table pig_complex_5 (m map) STORED AS TEXTFILE;\ ,'pig' => q\ -a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader(); +a = load 'studenttab10k' using org.apache.hive.hcatalog.pig.HCatLoader(); b = foreach a generate TOMAP('name', name, 'age', (chararray)age, 'gpa', (chararray)gpa) as m; -store b into 'pig_complex_5' using org.apache.hcatalog.pig.HCatStorer(); +store b into 'pig_complex_5' using org.apache.hive.hcatalog.pig.HCatStorer(); exec; -c = load 'pig_complex_5' using org.apache.hcatalog.pig.HCatLoader(); +c = load 'pig_complex_5' using org.apache.hive.hcatalog.pig.HCatLoader(); d = foreach c generate m#'name', m#'age', m#'gpa'; store d into ':OUTPATH:';\ ,'sql' => q\select name, age, gpa from studenttab10k;\ @@ -501,14 +501,14 @@ store d into ':OUTPATH:';\ ,'hcat_prep'=>q\drop table if exists pig_complex_6; create table pig_complex_6 (a array) STORED AS TEXTFILE;\ ,'pig' => q\ -a = load 'studenttab10k' using org.apache.hcatalog.pig.HCatLoader(); +a = load 'studenttab10k' using org.apache.hive.hcatalog.pig.HCatLoader(); b = foreach a generate name; c = distinct b; d = group c all; e = foreach d generate $1 as a; -store e into 'pig_complex_6' using org.apache.hcatalog.pig.HCatStorer(); +store e into 'pig_complex_6' using org.apache.hive.hcatalog.pig.HCatStorer(); exec; -f = load 'pig_complex_6' using org.apache.hcatalog.pig.HCatLoader(); +f = load 'pig_complex_6' using org.apache.hive.hcatalog.pig.HCatLoader(); g = foreach f generate flatten(a); store g into ':OUTPATH:';\ ,'sql' => q\select distinct name from studenttab10k;\ diff --git hcatalog/src/test/e2e/hcatalog/tools/generate/generate_data.pl hcatalog/src/test/e2e/hcatalog/tools/generate/generate_data.pl index e246340..09645b8 100644 --- hcatalog/src/test/e2e/hcatalog/tools/generate/generate_data.pl +++ hcatalog/src/test/e2e/hcatalog/tools/generate/generate_data.pl @@ -319,7 +319,7 @@ stored as rcfile location '$location';\n"; } elsif ($format eq "json") { print $hivefp " -row format serde 'org.apache.hcatalog.data.JsonSerDe' +row format serde 'org.apache.hive.hcatalog.data.JsonSerDe' stored as textfile location '$location' ;\n"; @@ -657,7 +657,7 @@ for (my $i = 0; $i < $numRows; $i++) { d double, m map, bb array>) - row format serde 'org.apache.hcatalog.data.JsonSerDe' + row format serde 'org.apache.hive.hcatalog.data.JsonSerDe' STORED AS TEXTFILE location '$hdfsTargetDir/$tableName';\n"; open(PLAIN, ">$tableName.plain") or diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/DataReaderMaster.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/DataReaderMaster.java deleted file mode 100644 index a29a0f1..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/DataReaderMaster.java +++ /dev/null @@ -1,71 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.utils; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.FileReader; -import java.io.IOException; -import java.io.ObjectOutputStream; -import java.util.HashMap; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Properties; - -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.transfer.DataTransferFactory; -import org.apache.hcatalog.data.transfer.HCatReader; -import org.apache.hcatalog.data.transfer.ReadEntity; -import org.apache.hcatalog.data.transfer.ReaderContext; - -public class DataReaderMaster { - - public static void main(String[] args) throws FileNotFoundException, IOException { - - // This config contains all the configuration that master node wants to provide - // to the HCatalog. - Properties externalConfigs = new Properties(); - externalConfigs.load(new FileReader(args[0])); - Map config = new HashMap(); - - for (Entry kv : externalConfigs.entrySet()) { - config.put((String) kv.getKey(), (String) kv.getValue()); - } - - // This piece of code runs in master node and gets necessary context. - ReaderContext context = runsInMaster(config); - - ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(new File(args[1]))); - oos.writeObject(context); - oos.flush(); - oos.close(); - // Master node will serialize readercontext and will make it available at slaves. - } - - private static ReaderContext runsInMaster(Map config) throws HCatException { - - ReadEntity.Builder builder = new ReadEntity.Builder(); - ReadEntity entity = builder.withTable(config.get("table")).build(); - HCatReader reader = DataTransferFactory.getHCatReader(entity, config); - ReaderContext cntxt = reader.prepareRead(); - return cntxt; - } -} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/DataReaderSlave.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/DataReaderSlave.java deleted file mode 100644 index 6145482..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/DataReaderSlave.java +++ /dev/null @@ -1,62 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.utils; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileWriter; -import java.io.IOException; -import java.io.ObjectInputStream; -import java.util.Iterator; -import java.util.List; - -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.transfer.DataTransferFactory; -import org.apache.hcatalog.data.transfer.HCatReader; -import org.apache.hcatalog.data.transfer.ReaderContext; - -public class DataReaderSlave { - - public static void main(String[] args) throws IOException, ClassNotFoundException { - - ObjectInputStream ois = new ObjectInputStream(new FileInputStream(new File(args[0]))); - ReaderContext cntxt = (ReaderContext) ois.readObject(); - ois.close(); - - String[] inpSlitsToRead = args[1].split(","); - List splits = cntxt.getSplits(); - - for (int i = 0; i < inpSlitsToRead.length; i++) { - InputSplit split = splits.get(Integer.parseInt(inpSlitsToRead[i])); - HCatReader reader = DataTransferFactory.getHCatReader(split, cntxt.getConf()); - Iterator itr = reader.read(); - File f = new File(args[2] + "-" + i); - f.delete(); - BufferedWriter outFile = new BufferedWriter(new FileWriter(f)); - while (itr.hasNext()) { - String rec = itr.next().toString().replaceFirst("\\s+$", ""); - System.err.println(rec); - outFile.write(rec + "\n"); - } - outFile.close(); - } - } -} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/DataWriterMaster.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/DataWriterMaster.java deleted file mode 100644 index 51125a7..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/DataWriterMaster.java +++ /dev/null @@ -1,96 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.utils; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.FileReader; -import java.io.IOException; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; -import java.util.HashMap; -import java.util.Map; -import java.util.Properties; -import java.util.Map.Entry; - -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.transfer.DataTransferFactory; -import org.apache.hcatalog.data.transfer.HCatWriter; -import org.apache.hcatalog.data.transfer.WriteEntity; -import org.apache.hcatalog.data.transfer.WriterContext; - -public class DataWriterMaster { - - public static void main(String[] args) throws FileNotFoundException, IOException, ClassNotFoundException { - - // This config contains all the configuration that master node wants to provide - // to the HCatalog. - Properties externalConfigs = new Properties(); - externalConfigs.load(new FileReader(args[0])); - Map config = new HashMap(); - - for (Entry kv : externalConfigs.entrySet()) { - System.err.println("k: " + kv.getKey() + "\t v: " + kv.getValue()); - config.put((String) kv.getKey(), (String) kv.getValue()); - } - - if (args.length == 3 && "commit".equalsIgnoreCase(args[2])) { - // Then, master commits if everything goes well. - ObjectInputStream ois = new ObjectInputStream(new FileInputStream(new File(args[1]))); - WriterContext cntxt = (WriterContext) ois.readObject(); - commit(config, true, cntxt); - System.exit(0); - } - // This piece of code runs in master node and gets necessary context. - WriterContext cntxt = runsInMaster(config); - - - // Master node will serialize writercontext and will make it available at slaves. - File f = new File(args[1]); - f.delete(); - ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(f)); - oos.writeObject(cntxt); - oos.flush(); - oos.close(); - } - - private static WriterContext runsInMaster(Map config) throws HCatException { - - WriteEntity.Builder builder = new WriteEntity.Builder(); - WriteEntity entity = builder.withTable(config.get("table")).build(); - HCatWriter writer = DataTransferFactory.getHCatWriter(entity, config); - WriterContext info = writer.prepareWrite(); - return info; - } - - private static void commit(Map config, boolean status, WriterContext cntxt) throws HCatException { - - WriteEntity.Builder builder = new WriteEntity.Builder(); - WriteEntity entity = builder.withTable(config.get("table")).build(); - HCatWriter writer = DataTransferFactory.getHCatWriter(entity, config); - if (status) { - writer.commit(cntxt); - } else { - writer.abort(cntxt); - } - } -} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/DataWriterSlave.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/DataWriterSlave.java deleted file mode 100644 index af826c2..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/DataWriterSlave.java +++ /dev/null @@ -1,87 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.utils; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileReader; -import java.io.IOException; -import java.io.ObjectInputStream; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.transfer.DataTransferFactory; -import org.apache.hcatalog.data.transfer.HCatWriter; -import org.apache.hcatalog.data.transfer.WriterContext; - -public class DataWriterSlave { - - public static void main(String[] args) throws FileNotFoundException, IOException, ClassNotFoundException { - - ObjectInputStream ois = new ObjectInputStream(new FileInputStream(args[0])); - WriterContext cntxt = (WriterContext) ois.readObject(); - ois.close(); - - HCatWriter writer = DataTransferFactory.getHCatWriter(cntxt); - writer.write(new HCatRecordItr(args[1])); - - } - - private static class HCatRecordItr implements Iterator { - - BufferedReader reader; - String curLine; - - public HCatRecordItr(String fileName) throws FileNotFoundException { - reader = new BufferedReader(new FileReader(new File(fileName))); - } - - @Override - public boolean hasNext() { - try { - curLine = reader.readLine(); - } catch (IOException e) { - e.printStackTrace(); - } - return null == curLine ? false : true; - } - - @Override - public HCatRecord next() { - - String[] fields = curLine.split("\t"); - List data = new ArrayList(3); - data.add(fields[0]); - data.add(Integer.parseInt(fields[1])); - data.add(Double.parseDouble(fields[2])); - return new DefaultHCatRecord(data); - } - - @Override - public void remove() { - // TODO Auto-generated method stub - - } - } -} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/GroupByAge.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/GroupByAge.java deleted file mode 100644 index 3ff2441..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/GroupByAge.java +++ /dev/null @@ -1,134 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.utils; - -import java.io.IOException; -import java.util.Iterator; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.util.GenericOptionsParser; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.HCatOutputFormat; -import org.apache.hcatalog.mapreduce.InputJobInfo; -import org.apache.hcatalog.mapreduce.OutputJobInfo; - -/** - * This is a map reduce test for testing hcat which goes against the "numbers" - * table. It performs a group by on the first column and a SUM operation on the - * other columns. This is to simulate a typical operation in a map reduce - * program to test that hcat hands the right data to the map reduce program - * - * Usage: hadoop jar sumnumbers <-libjars hive-hcat - * jar> The argument controls the output delimiter The hcat jar - * location should be specified as file:// - */ -public class GroupByAge extends Configured implements Tool { - - public static class Map extends - Mapper { - - int age; - - @Override - protected void map( - WritableComparable key, - HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - age = (Integer) value.get(1); - context.write(new IntWritable(age), new IntWritable(1)); - } - } - - public static class Reduce extends Reducer { - - - @Override - protected void reduce(IntWritable key, java.lang.Iterable - values, org.apache.hadoop.mapreduce.Reducer.Context context) - throws IOException, InterruptedException { - int sum = 0; - Iterator iter = values.iterator(); - while (iter.hasNext()) { - sum++; - iter.next(); - } - HCatRecord record = new DefaultHCatRecord(2); - record.set(0, key.get()); - record.set(1, sum); - - context.write(null, record); - } - } - - public int run(String[] args) throws Exception { - Configuration conf = getConf(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - - String serverUri = args[0]; - String inputTableName = args[1]; - String outputTableName = args[2]; - String dbName = null; - - String principalID = System - .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "GroupByAge"); - HCatInputFormat.setInput(job, InputJobInfo.create(dbName, - inputTableName, null)); - // initialize HCatOutputFormat - - job.setInputFormatClass(HCatInputFormat.class); - job.setJarByClass(GroupByAge.class); - job.setMapperClass(Map.class); - job.setReducerClass(Reduce.class); - job.setMapOutputKeyClass(IntWritable.class); - job.setMapOutputValueClass(IntWritable.class); - job.setOutputKeyClass(WritableComparable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, - outputTableName, null)); - HCatSchema s = HCatOutputFormat.getTableSchema(job); - System.err.println("INFO: output schema explicitly set for writing:" - + s); - HCatOutputFormat.setSchema(job, s); - job.setOutputFormatClass(HCatOutputFormat.class); - return (job.waitForCompletion(true) ? 0 : 1); - } - - public static void main(String[] args) throws Exception { - int exitCode = ToolRunner.run(new GroupByAge(), args); - System.exit(exitCode); - } -} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HBaseReadWrite.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HBaseReadWrite.java deleted file mode 100644 index d033a84..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HBaseReadWrite.java +++ /dev/null @@ -1,192 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.utils; - -import java.io.IOException; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.hadoop.util.GenericOptionsParser; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.HCatOutputFormat; -import org.apache.hcatalog.mapreduce.InputJobInfo; -import org.apache.hcatalog.mapreduce.OutputJobInfo; - -/** - * This is a map reduce test for testing hcat which goes against the "numbers" - * table. It performs a group by on the first column and a SUM operation on the - * other columns. This is to simulate a typical operation in a map reduce - * program to test that hcat hands the right data to the map reduce program - * - * Usage: hadoop jar sumnumbers <-libjars hive-hcat - * jar> The argument controls the output delimiter The hcat jar - * location should be specified as file:// - */ -public class HBaseReadWrite extends Configured implements Tool { - - public static class HBaseWriteMap extends - Mapper { - - String name; - String age; - String gpa; - - @Override - protected void map( - LongWritable key, - Text value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - String line = value.toString(); - String[] tokens = line.split("\t"); - name = tokens[0]; - - context.write(new Text(name), value); - } - } - - - public static class HBaseWriteReduce extends - Reducer { - - String name; - String age; - String gpa; - - @Override - protected void reduce(Text key, Iterable values, Context context) - throws IOException, InterruptedException { - name = key.toString(); - int count = 0; - double sum = 0; - for (Text value : values) { - String line = value.toString(); - String[] tokens = line.split("\t"); - name = tokens[0]; - age = tokens[1]; - gpa = tokens[2]; - - count++; - sum += Double.parseDouble(gpa.toString()); - } - - HCatRecord record = new DefaultHCatRecord(2); - record.set(0, name); - record.set(1, Double.toString(sum)); - - context.write(null, record); - } - } - - public static class HBaseReadMap extends - Mapper { - - String name; - String age; - String gpa; - - @Override - protected void map( - WritableComparable key, - HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - name = (String) value.get(0); - gpa = (String) value.get(1); - context.write(new Text(name), new Text(gpa)); - } - } - - - public int run(String[] args) throws Exception { - Configuration conf = getConf(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - - String serverUri = args[0]; - String inputDir = args[1]; - String tableName = args[2]; - String outputDir = args[3]; - String dbName = null; - - String principalID = System - .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - conf.set("hcat.hbase.output.bulkMode", "false"); - Job job = new Job(conf, "HBaseWrite"); - FileInputFormat.setInputPaths(job, inputDir); - - job.setInputFormatClass(TextInputFormat.class); - job.setOutputFormatClass(HCatOutputFormat.class); - job.setJarByClass(HBaseReadWrite.class); - job.setMapperClass(HBaseWriteMap.class); - job.setMapOutputKeyClass(Text.class); - job.setMapOutputValueClass(Text.class); - job.setReducerClass(HBaseWriteReduce.class); - job.setOutputKeyClass(WritableComparable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, - tableName, null)); - - boolean succ = job.waitForCompletion(true); - - if (!succ) return 1; - - job = new Job(conf, "HBaseRead"); - HCatInputFormat.setInput(job, InputJobInfo.create(dbName, tableName, - null)); - - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - job.setJarByClass(HBaseReadWrite.class); - job.setMapperClass(HBaseReadMap.class); - job.setOutputKeyClass(Text.class); - job.setOutputValueClass(Text.class); - job.setNumReduceTasks(0); - TextOutputFormat.setOutputPath(job, new Path(outputDir)); - - succ = job.waitForCompletion(true); - - if (!succ) return 2; - - return 0; - } - - public static void main(String[] args) throws Exception { - int exitCode = ToolRunner.run(new HBaseReadWrite(), args); - System.exit(exitCode); - } -} - diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTestDriver.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTestDriver.java deleted file mode 100644 index 85056bb..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTestDriver.java +++ /dev/null @@ -1,60 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.utils; - -import org.apache.hadoop.util.ProgramDriver; - -/** - * A description of an example program based on its class and a - * human-readable description. - */ -public class HCatTestDriver { - - public static void main(String argv[]) { - int exitCode = -1; - ProgramDriver pgd = new ProgramDriver(); - try { - pgd.addClass("typedatacheck", TypeDataCheck.class, - "A map/reduce program that checks the type of each field and" + - " outputs the entire table (to test hcat)."); - pgd.addClass("sumnumbers", SumNumbers.class, - "A map/reduce program that performs a group by on the first column and a " + - "SUM operation on the other columns of the \"numbers\" table."); - pgd.addClass("storenumbers", StoreNumbers.class, "A map/reduce program that " + - "reads from the \"numbers\" table and adds 10 to each fields and writes " + - "to the \"numbers_partitioned\" table into the datestamp=20100101 " + - "partition OR the \"numbers_empty_initially\" table based on a " + - "cmdline arg"); - pgd.addClass("storecomplex", StoreComplex.class, "A map/reduce program that " + - "reads from the \"complex\" table and stores as-is into the " + - "\"complex_empty_initially\" table."); - pgd.addClass("storedemo", StoreDemo.class, "demo prog."); - pgd.driver(argv); - - // Success - exitCode = 0; - } catch (Throwable e) { - e.printStackTrace(); - } - - System.exit(exitCode); - } -} - diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTypeCheck.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTypeCheck.java deleted file mode 100644 index bff8bb2..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTypeCheck.java +++ /dev/null @@ -1,151 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.utils; - -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import java.util.Map.Entry; - -import org.apache.pig.EvalFunc; -import org.apache.pig.data.DataBag; -import org.apache.pig.data.DataType; -import org.apache.pig.data.Tuple; -import org.apache.pig.impl.logicalLayer.schema.Schema; -import org.apache.pig.impl.util.Utils; - -/** - * This UDF can be used to check that a tuple presented by HCatLoader has the - * right types for the fields - * - * Usage is : - * - * register testudf.jar; - * a = load 'numbers' using HCatLoader(...); - * b = foreach a generate HCatTypeCheck('intnum1000:int,id:int,intnum5:int,intnum100:int,intnum:int,longnum:long,floatnum:float,doublenum:double', *); - * store b into 'output'; - * - * The schema string (the first argument to the UDF) is of the form one would provide in a - * pig load statement. - * - * The output should only contain the value '1' in all rows. (This UDF returns - * the integer value 1 if all fields have the right type, else throws IOException) - * - */ -public class HCatTypeCheck extends EvalFunc { - - static HashMap> typeMap = new HashMap>(); - - @Override - public Integer exec(Tuple input) throws IOException { - String schemaStr = (String) input.get(0); - Schema s = null; - try { - s = getSchemaFromString(schemaStr); - } catch (Exception e) { - throw new IOException(e); - } - for (int i = 0; i < s.size(); i++) { - check(s.getField(i).type, input.get(i + 1)); // input.get(i+1) since input.get(0) is the schema; - } - return 1; - } - - static { - typeMap.put(DataType.INTEGER, Integer.class); - typeMap.put(DataType.LONG, Long.class); - typeMap.put(DataType.FLOAT, Float.class); - typeMap.put(DataType.DOUBLE, Double.class); - typeMap.put(DataType.CHARARRAY, String.class); - typeMap.put(DataType.TUPLE, Tuple.class); - typeMap.put(DataType.MAP, Map.class); - typeMap.put(DataType.BAG, DataBag.class); - } - - - private void die(String expectedType, Object o) throws IOException { - throw new IOException("Expected " + expectedType + ", got " + - o.getClass().getName()); - } - - - private String check(Byte type, Object o) throws IOException { - if (o == null) { - return ""; - } - if (check(typeMap.get(type), o)) { - if (type.equals(DataType.MAP)) { - Map m = (Map) o; - check(m); - } else if (type.equals(DataType.BAG)) { - DataBag bg = (DataBag) o; - for (Tuple tuple : bg) { - Map m = (Map) tuple.get(0); - check(m); - } - } else if (type.equals(DataType.TUPLE)) { - Tuple t = (Tuple) o; - if (!check(Integer.class, t.get(0)) || - !check(String.class, t.get(1)) || - !check(Double.class, t.get(2))) { - die("t:tuple(num:int,str:string,dbl:double)", t); - } - } - } else { - die(typeMap.get(type).getName(), o); - } - return o.toString(); - } - - /** - * @param m - * @throws IOException - */ - private void check(Map m) throws IOException { - for (Entry e : m.entrySet()) { - // just access key and value to ensure they are correct - if (!check(String.class, e.getKey())) { - die("String", e.getKey()); - } - if (!check(String.class, e.getValue())) { - die("String", e.getValue()); - } - } - - } - - private boolean check(Class expected, Object actual) { - if (actual == null) { - return true; - } - return expected.isAssignableFrom(actual.getClass()); - } - - Schema getSchemaFromString(String schemaString) throws Exception { - /** ByteArrayInputStream stream = new ByteArrayInputStream(schemaString.getBytes()) ; - QueryParser queryParser = new QueryParser(stream) ; - Schema schema = queryParser.TupleSchema() ; - Schema.setSchemaDefaultType(schema, org.apache.pig.data.DataType.BYTEARRAY); - return schema; - */ - return Utils.getSchemaFromString(schemaString); - } - -} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTypeCheckHive.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTypeCheckHive.java deleted file mode 100644 index 61b541e..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/HCatTypeCheckHive.java +++ /dev/null @@ -1,142 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.utils; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; - -import org.apache.hadoop.hive.ql.exec.UDFArgumentException; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; - -/** - * A hive udf to check types of the fields read from hcat. A sample hive query which can use this is: - * - * create temporary function typecheck as 'org.apache.hcatalog.utils.HCatTypeCheckHive'; - * select typecheck('map+struct+array>+int', - * mymap, mytuple, bagofmap, rownum) from complex; - * - * - * The first argument to the UDF is a string representing the schema of the columns in the table. - * The columns in the tables are the remaining args to it. - * The schema specification consists of the types as given by "describe
templeton.storage.classorg.apache.hcatalog.templeton.tool.ZooKeeperStorageorg.apache.hive.hcatalog.templeton.tool.ZooKeeperStorage The class to use as storage
hive.semantic.analyzer.factory.implorg.apache.hcatalog.cli.HCatSemanticAnalyzerFactoryorg.apache.hive.hcatalog.cli.HCatSemanticAnalyzerFactory
hive.metastore.warehouse.dir
" - * with each column's type separated from the next column's type by a '+' - * - * The UDF will throw an exception (and cause the query to fail) if it does not - * encounter the correct types. - * - * The output is a string representation of the data , type and hive category. - * It is not advisable to use this against large dataset since the output would also - * be large. - * - */ -public final class HCatTypeCheckHive extends GenericUDF { - - ObjectInspector[] argOIs; - - @Override - public Object evaluate(DeferredObject[] args) throws HiveException { - List row = new ArrayList(); - String typesStr = (String) getJavaObject(args[0].get(), argOIs[0], new ArrayList()); - String[] types = typesStr.split("\\+"); - for (int i = 0; i < types.length; i++) { - types[i] = types[i].toLowerCase(); - } - for (int i = 1; i < args.length; i++) { - ObjectInspector oi = argOIs[i]; - List categories = new ArrayList(); - Object o = getJavaObject(args[i].get(), oi, categories); - try { - if (o != null) { - Util.check(types[i - 1], o); - } - } catch (IOException e) { - throw new HiveException(e); - } - row.add(o == null ? "null" : o); - row.add(":" + (o == null ? "null" : o.getClass()) + ":" + categories); - } - return row.toString(); - } - - private Object getJavaObject(Object o, ObjectInspector oi, List categories) { - if (categories != null) { - categories.add(oi.getCategory()); - } - if (oi.getCategory() == ObjectInspector.Category.LIST) { - List l = ((ListObjectInspector) oi).getList(o); - List result = new ArrayList(); - ObjectInspector elemOI = ((ListObjectInspector) oi).getListElementObjectInspector(); - for (Object lo : l) { - result.add(getJavaObject(lo, elemOI, categories)); - } - return result; - } else if (oi.getCategory() == ObjectInspector.Category.MAP) { - Map m = ((MapObjectInspector) oi).getMap(o); - Map result = new HashMap(); - ObjectInspector koi = ((MapObjectInspector) oi).getMapKeyObjectInspector(); - ObjectInspector voi = ((MapObjectInspector) oi).getMapValueObjectInspector(); - for (Entry e : m.entrySet()) { - result.put((String) getJavaObject(e.getKey(), koi, null), - (String) getJavaObject(e.getValue(), voi, null)); - } - return result; - - } else if (oi.getCategory() == ObjectInspector.Category.STRUCT) { - List s = ((StructObjectInspector) oi).getStructFieldsDataAsList(o); - List sf = ((StructObjectInspector) oi).getAllStructFieldRefs(); - List result = new ArrayList(); - for (int i = 0; i < s.size(); i++) { - result.add(getJavaObject(s.get(i), sf.get(i).getFieldObjectInspector(), categories)); - } - return result; - } else if (oi.getCategory() == ObjectInspector.Category.PRIMITIVE) { - return ((PrimitiveObjectInspector) oi).getPrimitiveJavaObject(o); - } - throw new RuntimeException("Unexpected error!"); - } - - @Override - public String getDisplayString(String[] arg0) { - return null; - } - - @Override - public ObjectInspector initialize(ObjectInspector[] argOIs) - throws UDFArgumentException { - this.argOIs = argOIs; - return ObjectInspectorFactory.getReflectionObjectInspector(String.class, - ObjectInspectorOptions.JAVA); - } - -} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadJson.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadJson.java deleted file mode 100644 index 998bfb8..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadJson.java +++ /dev/null @@ -1,112 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.utils; - -import java.io.IOException; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.hadoop.util.GenericOptionsParser; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.InputJobInfo; - -/** - * This is a map reduce test for testing hcat which goes against the "numbers" - * table. It performs a group by on the first column and a SUM operation on the - * other columns. This is to simulate a typical operation in a map reduce program - * to test that hcat hands the right data to the map reduce program - * - * Usage: hadoop jar sumnumbers <-libjars hive-hcat jar> - The argument controls the output delimiter - The hcat jar location should be specified as file:// - */ -public class ReadJson extends Configured implements Tool { - - public static class Map - extends Mapper { - - String s; - Integer i; - Double d; - - @Override - protected void map(WritableComparable key, HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - s = value.get(0) == null ? null : (String) value.get(0); - i = value.get(1) == null ? null : (Integer) value.get(1); - d = value.get(2) == null ? null : (Double) value.get(2); - - HCatRecord record = new DefaultHCatRecord(3); - record.set(0, s); - record.set(1, i); - record.set(2, d); - - context.write(null, record); - - } - } - - public int run(String[] args) throws Exception { - Configuration conf = getConf(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - - String serverUri = args[0]; - String tableName = args[1]; - String outputDir = args[2]; - String dbName = null; - - String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "ReadJson"); - HCatInputFormat.setInput(job, InputJobInfo.create( - dbName, tableName, null)); - // initialize HCatOutputFormat - - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - job.setJarByClass(ReadJson.class); - job.setMapperClass(Map.class); - job.setOutputKeyClass(IntWritable.class); - job.setOutputValueClass(HCatRecord.class); - job.setNumReduceTasks(0); - FileOutputFormat.setOutputPath(job, new Path(outputDir)); - return (job.waitForCompletion(true) ? 0 : 1); - } - - public static void main(String[] args) throws Exception { - int exitCode = ToolRunner.run(new ReadJson(), args); - System.exit(exitCode); - } -} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadRC.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadRC.java deleted file mode 100644 index 83f8cc2..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadRC.java +++ /dev/null @@ -1,113 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.utils; - -import java.io.IOException; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.hadoop.util.GenericOptionsParser; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.InputJobInfo; - -/** - * This is a map reduce test for testing hcat which goes against the "numbers" - * table. It performs a group by on the first column and a SUM operation on the - * other columns. This is to simulate a typical operation in a map reduce program - * to test that hcat hands the right data to the map reduce program - * - * Usage: hadoop jar sumnumbers <-libjars hive-hcat jar> - The argument controls the output delimiter - The hcat jar location should be specified as file:// - */ -public class ReadRC extends Configured implements Tool { - - public static class Map - extends Mapper { - - String name; - int age; - double gpa; - - @Override - protected void map(WritableComparable key, HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - name = (String) value.get(0); - age = (Integer) value.get(1); - gpa = (Double) value.get(2); - gpa = Math.floor(gpa) + 0.1; - - HCatRecord record = new DefaultHCatRecord(3); - record.set(0, name); - record.set(1, age); - record.set(2, gpa); - - context.write(null, record); - - } - } - - public int run(String[] args) throws Exception { - Configuration conf = getConf(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - - String serverUri = args[0]; - String tableName = args[1]; - String outputDir = args[2]; - String dbName = null; - - String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "ReadRC"); - HCatInputFormat.setInput(job, InputJobInfo.create( - dbName, tableName, null)); - // initialize HCatOutputFormat - - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - job.setJarByClass(ReadRC.class); - job.setMapperClass(Map.class); - job.setOutputKeyClass(IntWritable.class); - job.setOutputValueClass(HCatRecord.class); - job.setNumReduceTasks(0); - FileOutputFormat.setOutputPath(job, new Path(outputDir)); - return (job.waitForCompletion(true) ? 0 : 1); - } - - public static void main(String[] args) throws Exception { - int exitCode = ToolRunner.run(new ReadRC(), args); - System.exit(exitCode); - } -} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadText.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadText.java deleted file mode 100644 index be1d5b6..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadText.java +++ /dev/null @@ -1,124 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.utils; - -import java.io.IOException; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.hadoop.util.GenericOptionsParser; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.InputJobInfo; - -/** - * This is a map reduce test for testing hcat which goes against the "numbers" - * table. It performs a group by on the first column and a SUM operation on the - * other columns. This is to simulate a typical operation in a map reduce program - * to test that hcat hands the right data to the map reduce program - * - * Usage: hadoop jar sumnumbers <-libjars hive-hcat jar> - The argument controls the output delimiter - The hcat jar location should be specified as file:// - */ -public class ReadText extends Configured implements Tool { - - public static class Map - extends Mapper { - - byte t; - short si; - int i; - long b; - float f; - double d; - String s; - - @Override - protected void map(WritableComparable key, HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - t = (Byte) value.get(0); - si = (Short) value.get(1); - i = (Integer) value.get(2); - b = (Long) value.get(3); - f = (Float) value.get(4); - d = (Double) value.get(5); - s = (String) value.get(6); - - HCatRecord record = new DefaultHCatRecord(7); - record.set(0, t); - record.set(1, si); - record.set(2, i); - record.set(3, b); - record.set(4, f); - record.set(5, d); - record.set(6, s); - - context.write(null, record); - - } - } - - public int run(String[] args) throws Exception { - Configuration conf = getConf(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - - String serverUri = args[0]; - String tableName = args[1]; - String outputDir = args[2]; - String dbName = null; - - String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "ReadText"); - HCatInputFormat.setInput(job, InputJobInfo.create( - dbName, tableName, null)); - // initialize HCatOutputFormat - - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - job.setJarByClass(ReadText.class); - job.setMapperClass(Map.class); - job.setOutputKeyClass(IntWritable.class); - job.setOutputValueClass(HCatRecord.class); - job.setNumReduceTasks(0); - FileOutputFormat.setOutputPath(job, new Path(outputDir)); - return (job.waitForCompletion(true) ? 0 : 1); - } - - public static void main(String[] args) throws Exception { - int exitCode = ToolRunner.run(new ReadText(), args); - System.exit(exitCode); - } -} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadWrite.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadWrite.java deleted file mode 100644 index d375cc2..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadWrite.java +++ /dev/null @@ -1,112 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.utils; - -import java.io.IOException; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.util.GenericOptionsParser; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.HCatOutputFormat; -import org.apache.hcatalog.mapreduce.InputJobInfo; -import org.apache.hcatalog.mapreduce.OutputJobInfo; - -/** - * This is a map reduce test for testing hcat which goes against the "numbers" - * table. It performs a group by on the first column and a SUM operation on the - * other columns. This is to simulate a typical operation in a map reduce - * program to test that hcat hands the right data to the map reduce program - * - * Usage: hadoop jar sumnumbers <-libjars hive-hcat - * jar> The argument controls the output delimiter The hcat jar - * location should be specified as file:// - */ -public class ReadWrite extends Configured implements Tool { - - public static class Map extends - Mapper { - - String name; - int age; - double gpa; - - @Override - protected void map( - WritableComparable key, - HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - name = (String) value.get(0); - age = (Integer) value.get(1); - gpa = (Double) value.get(2); - context.write(new Text(name), value); - - } - } - - public int run(String[] args) throws Exception { - Configuration conf = getConf(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - - String serverUri = args[0]; - String inputTableName = args[1]; - String outputTableName = args[2]; - String dbName = null; - - String principalID = System - .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "ReadWrite"); - HCatInputFormat.setInput(job, InputJobInfo.create(dbName, - inputTableName, null)); - // initialize HCatOutputFormat - - job.setInputFormatClass(HCatInputFormat.class); - job.setJarByClass(ReadWrite.class); - job.setMapperClass(Map.class); - job.setOutputKeyClass(Text.class); - job.setOutputValueClass(DefaultHCatRecord.class); - HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, - outputTableName, null)); - HCatSchema s = HCatInputFormat.getTableSchema(job); - System.err.println("INFO: output schema explicitly set for writing:" - + s); - HCatOutputFormat.setSchema(job, s); - job.setOutputFormatClass(HCatOutputFormat.class); - return (job.waitForCompletion(true) ? 0 : 1); - } - - public static void main(String[] args) throws Exception { - int exitCode = ToolRunner.run(new ReadWrite(), args); - System.exit(exitCode); - } -} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/SimpleRead.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/SimpleRead.java deleted file mode 100644 index 365f577..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/SimpleRead.java +++ /dev/null @@ -1,108 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.utils; - -import java.io.IOException; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.hadoop.util.GenericOptionsParser; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.InputJobInfo; - -/** - * This is a map reduce test for testing hcat which goes against the "numbers" - * table. It performs a group by on the first column and a SUM operation on the - * other columns. This is to simulate a typical operation in a map reduce program - * to test that hcat hands the right data to the map reduce program - * - * Usage: hadoop jar sumnumbers <-libjars hive-hcat jar> - The argument controls the output delimiter - The hcat jar location should be specified as file:// - */ -public class SimpleRead extends Configured implements Tool { - - private static final String TABLE_NAME = "studenttab10k"; - private static final String TAB = "\t"; - - public static class Map - extends Mapper { - - String name; - int age; - double gpa; - - @Override - protected void map(WritableComparable key, HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - name = (String) value.get(0); - age = (Integer) value.get(1); - gpa = (Double) value.get(2); - context.write(new Text(name), new IntWritable(age)); - - } - } - - public int run(String[] args) throws Exception { - Configuration conf = getConf(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - - String serverUri = args[0]; - String tableName = args[1]; - String outputDir = args[2]; - String dbName = null; - - String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "SimpleRead"); - HCatInputFormat.setInput(job, InputJobInfo.create( - dbName, tableName, null)); - // initialize HCatOutputFormat - - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - job.setJarByClass(SimpleRead.class); - job.setMapperClass(Map.class); - job.setOutputKeyClass(Text.class); - job.setOutputValueClass(IntWritable.class); - FileOutputFormat.setOutputPath(job, new Path(outputDir)); - return (job.waitForCompletion(true) ? 0 : 1); - } - - public static void main(String[] args) throws Exception { - int exitCode = ToolRunner.run(new SimpleRead(), args); - System.exit(exitCode); - } -} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreComplex.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreComplex.java deleted file mode 100644 index 6db720a..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreComplex.java +++ /dev/null @@ -1,136 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.utils; - -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import java.util.Random; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.util.GenericOptionsParser; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.HCatOutputFormat; -import org.apache.hcatalog.mapreduce.InputJobInfo; -import org.apache.hcatalog.mapreduce.OutputJobInfo; - -/** - * This is a map reduce test for testing hcat which goes against the "complex" - * table and writes to "complex_nopart_empty_initially" table. It reads data from complex which - * is an unpartitioned table and stores the data as-is into complex_empty_initially table - * (which is also unpartitioned) - * - * Usage: hadoop jar testudf.jar storecomplex <-libjars hive-hcat jar> - The hcat jar location should be specified as file:// - */ -public class StoreComplex { - - private static final String COMPLEX_TABLE_NAME = "complex"; - private static final String COMPLEX_NOPART_EMPTY_INITIALLY_TABLE_NAME = "complex_nopart_empty_initially"; - - - public static class ComplexMapper - extends Mapper { - - @Override - protected void map(WritableComparable key, HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - // just write out the value as-is - context.write(new IntWritable(0), value); - - } - } - - - public static void main(String[] args) throws Exception { - Configuration conf = new Configuration(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - String[] otherArgs = new String[1]; - int j = 0; - for (int i = 0; i < args.length; i++) { - if (args[i].equals("-libjars")) { - // generic options parser doesn't seem to work! - conf.set("tmpjars", args[i + 1]); - i = i + 1; // skip it , the for loop will skip its value - } else { - otherArgs[j++] = args[i]; - } - } - if (otherArgs.length != 1) { - usage(); - } - String serverUri = otherArgs[0]; - String tableName = COMPLEX_TABLE_NAME; - String dbName = "default"; - Map outputPartitionKvps = new HashMap(); - String outputTableName = null; - outputTableName = COMPLEX_NOPART_EMPTY_INITIALLY_TABLE_NAME; - // test with null or empty randomly - if (new Random().nextInt(2) == 0) { - System.err.println("INFO: output partition keys set to null for writing"); - outputPartitionKvps = null; - } - String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "storecomplex"); - // initialize HCatInputFormat - - HCatInputFormat.setInput(job, InputJobInfo.create( - dbName, tableName, null)); - // initialize HCatOutputFormat - HCatOutputFormat.setOutput(job, OutputJobInfo.create( - dbName, outputTableName, outputPartitionKvps)); - - - HCatSchema s = HCatInputFormat.getTableSchema(job); - HCatOutputFormat.setSchema(job, s); - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(HCatOutputFormat.class); - job.setJarByClass(StoreComplex.class); - job.setMapperClass(ComplexMapper.class); - job.setOutputKeyClass(IntWritable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - System.exit(job.waitForCompletion(true) ? 0 : 1); - } - - - /** - * - */ - private static void usage() { - System.err.println("Usage: hadoop jar testudf.jar storecomplex <-libjars hive-hcat jar>\n" + - "The hcat jar location should be specified as file://\n"); - System.exit(2); - - } - - -} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreDemo.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreDemo.java deleted file mode 100644 index cc2e0d7..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreDemo.java +++ /dev/null @@ -1,153 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.utils; - -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.util.GenericOptionsParser; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.HCatOutputFormat; -import org.apache.hcatalog.mapreduce.InputJobInfo; -import org.apache.hcatalog.mapreduce.OutputJobInfo; - -/** - * This is a map reduce test for testing hcat which goes against the "numbers" - * table and writes data to another table. It reads data from numbers which - * is an unpartitioned table and adds 10 to each field. It stores the result into - * the datestamp='20100101' partition of the numbers_part_empty_initially table if the second - * command line arg is "part". If the second cmdline arg is "nopart" then the - * result is stored into the 'numbers_nopart_empty_initially' (unpartitioned) table. - * If the second cmdline arg is "nopart_pig", then the result is stored into the - * 'numbers_nopart_pig_empty_initially' (unpartitioned) table with the tinyint - * and smallint columns in "numbers" being stored as "int" (since pig cannot handle - * tinyint and smallint) - * - * Usage: hadoop jar storenumbers <-libjars hive-hcat jar> - If the second argument is "part" data is written to datestamp = '2010101' partition of the numbers_part_empty_initially table. - If the second argument is "nopart", data is written to the unpartitioned numbers_nopart_empty_initially table. - If the second argument is "nopart_pig", data is written to the unpartitioned numbers_nopart_pig_empty_initially table. - The hcat jar location should be specified as file:// - */ -public class StoreDemo { - - private static final String NUMBERS_PARTITIONED_TABLE_NAME = "demo_partitioned"; - private static final String NUMBERS_TABLE_NAME = "demo"; - - public static class SumMapper - extends Mapper { - - - Integer intnum; - - Double doublenum; - - @Override - protected void map(WritableComparable key, HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - intnum = ((Integer) value.get(0)); - value.set(0, intnum + 20); - doublenum = ((Double) value.get(1)); - value.set(1, (Double) (doublenum + 20)); - context.write(new IntWritable(0), value); - - } - } - - - public static void main(String[] args) throws Exception { - Configuration conf = new Configuration(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - String[] otherArgs = new String[1]; - int j = 0; - for (int i = 0; i < args.length; i++) { - if (args[i].equals("-libjars")) { - // generic options parser doesn't seem to work! - conf.set("tmpjars", args[i + 1]); - i = i + 1; // skip it , the for loop will skip its value - } else { - otherArgs[j++] = args[i]; - } - } - if (otherArgs.length != 1) { - usage(); - } - String serverUri = otherArgs[0]; - - String tableName = NUMBERS_TABLE_NAME; - String dbName = "default"; - Map outputPartitionKvps = new HashMap(); - String outputTableName = NUMBERS_PARTITIONED_TABLE_NAME; - outputPartitionKvps.put("datestamp", "20100102"); - - String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "storedemo"); - // initialize HCatInputFormat - HCatInputFormat.setInput(job, InputJobInfo.create( - dbName, tableName, null)); - // initialize HCatOutputFormat - HCatOutputFormat.setOutput(job, OutputJobInfo.create( - dbName, outputTableName, outputPartitionKvps)); - // test with and without specifying schema randomly - HCatSchema s = HCatInputFormat.getTableSchema(job); - System.err.println("INFO: output schema explicitly set for writing:" + s); - HCatOutputFormat.setSchema(job, s); - - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(HCatOutputFormat.class); - job.setJarByClass(StoreDemo.class); - job.setMapperClass(SumMapper.class); - job.setOutputKeyClass(IntWritable.class); - job.setNumReduceTasks(0); - job.setOutputValueClass(DefaultHCatRecord.class); - System.exit(job.waitForCompletion(true) ? 0 : 1); - } - - - /** - * - */ - private static void usage() { - System.err.println("Usage: hadoop jar storenumbers <-libjars hive-hcat jar>\n" + - "\tIf the second argument is \"part\" data is written to datestamp = '2010101' partition of " + - "the numbers_part_empty_initially table.\n\tIf the second argument is \"nopart\", data is written to " + - "the unpartitioned numbers_nopart_empty_initially table.\n\tIf the second argument is \"nopart_pig\", " + - "data is written to the unpartitioned numbers_nopart_pig_empty_initially table.\nt" + - "The hcat jar location should be specified as file://\n"); - System.exit(2); - - } - - -} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreNumbers.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreNumbers.java deleted file mode 100644 index f9567a1..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/StoreNumbers.java +++ /dev/null @@ -1,233 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.utils; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Random; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.util.GenericOptionsParser; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.HCatOutputFormat; -import org.apache.hcatalog.mapreduce.InputJobInfo; -import org.apache.hcatalog.mapreduce.OutputJobInfo; - -/** - * This is a map reduce test for testing hcat which goes against the "numbers" - * table and writes data to another table. It reads data from numbers which - * is an unpartitioned table and adds 10 to each field. It stores the result into - * the datestamp='20100101' partition of the numbers_part_empty_initially table if the second - * command line arg is "part". If the second cmdline arg is "nopart" then the - * result is stored into the 'numbers_nopart_empty_initially' (unpartitioned) table. - * If the second cmdline arg is "nopart_pig", then the result is stored into the - * 'numbers_nopart_pig_empty_initially' (unpartitioned) table with the tinyint - * and smallint columns in "numbers" being stored as "int" (since pig cannot handle - * tinyint and smallint) - * - * Usage: hadoop jar storenumbers <-libjars hive-hcat jar> - If the second argument is "part" data is written to datestamp = '2010101' partition of the numbers_part_empty_initially table. - If the second argument is "nopart", data is written to the unpartitioned numbers_nopart_empty_initially table. - If the second argument is "nopart_pig", data is written to the unpartitioned numbers_nopart_pig_empty_initially table. - The hcat jar location should be specified as file:// - */ -public class StoreNumbers { - - private static final String NUMBERS_PARTITIONED_TABLE_NAME = "numbers_part_empty_initially"; - private static final String NUMBERS_TABLE_NAME = "numbers"; - private static final String NUMBERS_NON_PARTITIONED_TABLE_NAME = "numbers_nopart_empty_initially"; - private static final String NUMBERS_NON_PARTITIONED_PIG_TABLE_NAME = "numbers_nopart_pig_empty_initially"; - private static final String IS_PIG_NON_PART_TABLE = "is.pig.non.part.table"; - - public static class SumMapper - extends Mapper { - - Integer intnum1000; - // though id is given as a Short by hcat, the map will emit it as an - // IntWritable so we can just sum in the reduce - Short id; - - // though intnum5 is handed as a Byte by hcat, the map() will emit it as - // an IntWritable so we can just sum in the reduce - Byte intnum5; - Integer intnum100; - Integer intnum; - Long longnum; - Float floatnum; - Double doublenum; - - @Override - protected void map(WritableComparable key, HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - boolean isnoPartPig = context.getConfiguration().getBoolean(IS_PIG_NON_PART_TABLE, false); - intnum1000 = ((Integer) value.get(0)); - id = ((Short) value.get(1)); - intnum5 = (((Byte) value.get(2))); - intnum100 = (((Integer) value.get(3))); - intnum = ((Integer) value.get(4)); - longnum = ((Long) value.get(5)); - floatnum = ((Float) value.get(6)); - doublenum = ((Double) value.get(7)); - HCatRecord output = new DefaultHCatRecord(8); - output.set(0, intnum1000 + 10); - if (isnoPartPig) { - output.set(1, ((int) (id + 10))); - } else { - output.set(1, ((short) (id + 10))); - } - if (isnoPartPig) { - output.set(2, (int) (intnum5 + 10)); - } else { - output.set(2, (byte) (intnum5 + 10)); - } - - output.set(3, intnum100 + 10); - output.set(4, intnum + 10); - output.set(5, (long) (longnum + 10)); - output.set(6, (float) (floatnum + 10)); - output.set(7, (double) (doublenum + 10)); - for (int i = 0; i < 8; i++) { - System.err.println("XXX: class:" + output.get(i).getClass()); - } - context.write(new IntWritable(0), output); - - } - } - - - public static void main(String[] args) throws Exception { - Configuration conf = new Configuration(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - String[] otherArgs = new String[2]; - int j = 0; - for (int i = 0; i < args.length; i++) { - if (args[i].equals("-libjars")) { - // generic options parser doesn't seem to work! - conf.set("tmpjars", args[i + 1]); - i = i + 1; // skip it , the for loop will skip its value - } else { - otherArgs[j++] = args[i]; - } - } - if (otherArgs.length != 2) { - usage(); - } - String serverUri = otherArgs[0]; - if (otherArgs[1] == null || ( - !otherArgs[1].equalsIgnoreCase("part") && !otherArgs[1].equalsIgnoreCase("nopart")) - && !otherArgs[1].equalsIgnoreCase("nopart_pig")) { - usage(); - } - boolean writeToPartitionedTable = (otherArgs[1].equalsIgnoreCase("part")); - boolean writeToNonPartPigTable = (otherArgs[1].equalsIgnoreCase("nopart_pig")); - String tableName = NUMBERS_TABLE_NAME; - String dbName = "default"; - Map outputPartitionKvps = new HashMap(); - String outputTableName = null; - conf.set(IS_PIG_NON_PART_TABLE, "false"); - if (writeToPartitionedTable) { - outputTableName = NUMBERS_PARTITIONED_TABLE_NAME; - outputPartitionKvps.put("datestamp", "20100101"); - } else { - if (writeToNonPartPigTable) { - conf.set(IS_PIG_NON_PART_TABLE, "true"); - outputTableName = NUMBERS_NON_PARTITIONED_PIG_TABLE_NAME; - } else { - outputTableName = NUMBERS_NON_PARTITIONED_TABLE_NAME; - } - // test with null or empty randomly - if (new Random().nextInt(2) == 0) { - outputPartitionKvps = null; - } - } - - String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "storenumbers"); - - // initialize HCatInputFormat - HCatInputFormat.setInput(job, InputJobInfo.create( - dbName, tableName, null)); - // initialize HCatOutputFormat - HCatOutputFormat.setOutput(job, OutputJobInfo.create( - dbName, outputTableName, outputPartitionKvps)); - // test with and without specifying schema randomly - HCatSchema s = HCatInputFormat.getTableSchema(job); - if (writeToNonPartPigTable) { - List newHfsList = new ArrayList(); - // change smallint and tinyint to int - for (HCatFieldSchema hfs : s.getFields()) { - if (hfs.getTypeString().equals("smallint")) { - newHfsList.add(new HCatFieldSchema(hfs.getName(), - HCatFieldSchema.Type.INT, hfs.getComment())); - } else if (hfs.getTypeString().equals("tinyint")) { - newHfsList.add(new HCatFieldSchema(hfs.getName(), - HCatFieldSchema.Type.INT, hfs.getComment())); - } else { - newHfsList.add(hfs); - } - } - s = new HCatSchema(newHfsList); - } - HCatOutputFormat.setSchema(job, s); - - - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(HCatOutputFormat.class); - job.setJarByClass(StoreNumbers.class); - job.setMapperClass(SumMapper.class); - job.setOutputKeyClass(IntWritable.class); - job.setNumReduceTasks(0); - job.setOutputValueClass(DefaultHCatRecord.class); - System.exit(job.waitForCompletion(true) ? 0 : 1); - } - - - /** - * - */ - private static void usage() { - System.err.println("Usage: hadoop jar storenumbers <-libjars hive-hcat jar>\n" + - "\tIf the second argument is \"part\" data is written to datestamp = '2010101' partition of " + - "the numbers_part_empty_initially table.\n\tIf the second argument is \"nopart\", data is written to " + - "the unpartitioned numbers_nopart_empty_initially table.\n\tIf the second argument is \"nopart_pig\", " + - "data is written to the unpartitioned numbers_nopart_pig_empty_initially table.\nt" + - "The hcat jar location should be specified as file://\n"); - System.exit(2); - - } - - -} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/SumNumbers.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/SumNumbers.java deleted file mode 100644 index 0a451ae..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/SumNumbers.java +++ /dev/null @@ -1,258 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.utils; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.DoubleWritable; -import org.apache.hadoop.io.FloatWritable; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.hadoop.util.GenericOptionsParser; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.InputJobInfo; - -/** - * This is a map reduce test for testing hcat which goes against the "numbers" - * table. It performs a group by on the first column and a SUM operation on the - * other columns. This is to simulate a typical operation in a map reduce program - * to test that hcat hands the right data to the map reduce program - * - * Usage: hadoop jar sumnumbers <-libjars hive-hcat jar> - The argument controls the output delimiter - The hcat jar location should be specified as file:// - */ -public class SumNumbers { - - private static final String NUMBERS_TABLE_NAME = "numbers"; - private static final String TAB = "\t"; - - public static class SumMapper - extends Mapper { - - IntWritable intnum1000; - // though id is given as a Short by hcat, the map will emit it as an - // IntWritable so we can just sum in the reduce - IntWritable id; - - // though intnum5 is handed as a Byte by hcat, the map() will emit it as - // an IntWritable so we can just sum in the reduce - IntWritable intnum5; - IntWritable intnum100; - IntWritable intnum; - LongWritable longnum; - FloatWritable floatnum; - DoubleWritable doublenum; - - @Override - protected void map(WritableComparable key, HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - intnum1000 = new IntWritable((Integer) value.get(0)); - id = new IntWritable((Short) value.get(1)); - intnum5 = new IntWritable(((Byte) value.get(2))); - intnum100 = new IntWritable(((Integer) value.get(3))); - intnum = new IntWritable((Integer) value.get(4)); - longnum = new LongWritable((Long) value.get(5)); - floatnum = new FloatWritable((Float) value.get(6)); - doublenum = new DoubleWritable((Double) value.get(7)); - SumNumbers.ArrayWritable outputValue = new SumNumbers.ArrayWritable(id, - intnum5, intnum100, intnum, longnum, floatnum, doublenum); - context.write(intnum1000, outputValue); - - } - } - - public static class SumReducer extends Reducer { - - - LongWritable dummyLong = null; - - @Override - protected void reduce(IntWritable key, java.lang.Iterable - values, org.apache.hadoop.mapreduce.Reducer.Context context) - throws IOException, InterruptedException { - String output = key.toString() + TAB; - Long sumid = 0l; - Long sumintnum5 = 0l; - Long sumintnum100 = 0l; - Long sumintnum = 0l; - Long sumlongnum = 0l; - Float sumfloatnum = 0.0f; - Double sumdoublenum = 0.0; - for (ArrayWritable value : values) { - sumid += value.id.get(); - sumintnum5 += value.intnum5.get(); - sumintnum100 += value.intnum100.get(); - sumintnum += value.intnum.get(); - sumlongnum += value.longnum.get(); - sumfloatnum += value.floatnum.get(); - sumdoublenum += value.doublenum.get(); - } - output += sumid + TAB; - output += sumintnum5 + TAB; - output += sumintnum100 + TAB; - output += sumintnum + TAB; - output += sumlongnum + TAB; - output += sumfloatnum + TAB; - output += sumdoublenum + TAB; - context.write(dummyLong, new Text(output)); - } - } - - public static void main(String[] args) throws Exception { - Configuration conf = new Configuration(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - String[] otherArgs = new String[4]; - int j = 0; - for (int i = 0; i < args.length; i++) { - if (args[i].equals("-libjars")) { - // generic options parser doesn't seem to work! - conf.set("tmpjars", args[i + 1]); - i = i + 1; // skip it , the for loop will skip its value - } else { - otherArgs[j++] = args[i]; - } - } - if (otherArgs.length != 4) { - System.err.println("Usage: hadoop jar sumnumbers <-libjars hive-hcat jar>\n" + - "The argument controls the output delimiter.\n" + - "The hcat jar location should be specified as file://\n"); - System.exit(2); - } - String serverUri = otherArgs[0]; - String tableName = NUMBERS_TABLE_NAME; - String outputDir = otherArgs[1]; - String dbName = "default"; - - String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "sumnumbers"); - HCatInputFormat.setInput(job, InputJobInfo.create( - dbName, tableName, null)); - // initialize HCatOutputFormat - - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - job.setJarByClass(SumNumbers.class); - job.setMapperClass(SumMapper.class); - job.setReducerClass(SumReducer.class); - job.setMapOutputKeyClass(IntWritable.class); - job.setMapOutputValueClass(ArrayWritable.class); - job.setOutputKeyClass(LongWritable.class); - job.setOutputValueClass(Text.class); - FileOutputFormat.setOutputPath(job, new Path(outputDir)); - System.exit(job.waitForCompletion(true) ? 0 : 1); - } - - public static class ArrayWritable implements Writable { - - // though id is given as a Short by hcat, the map will emit it as an - // IntWritable so we can just sum in the reduce - IntWritable id; - - // though intnum5 is handed as a Byte by hcat, the map() will emit it as - // an IntWritable so we can just sum in the reduce - IntWritable intnum5; - - IntWritable intnum100; - IntWritable intnum; - LongWritable longnum; - FloatWritable floatnum; - DoubleWritable doublenum; - - /** - * - */ - public ArrayWritable() { - id = new IntWritable(); - intnum5 = new IntWritable(); - intnum100 = new IntWritable(); - intnum = new IntWritable(); - longnum = new LongWritable(); - floatnum = new FloatWritable(); - doublenum = new DoubleWritable(); - } - - - /** - * @param id - * @param intnum5 - * @param intnum100 - * @param intnum - * @param longnum - * @param floatnum - * @param doublenum - */ - public ArrayWritable(IntWritable id, IntWritable intnum5, - IntWritable intnum100, IntWritable intnum, LongWritable longnum, - FloatWritable floatnum, DoubleWritable doublenum) { - this.id = id; - this.intnum5 = intnum5; - this.intnum100 = intnum100; - this.intnum = intnum; - this.longnum = longnum; - this.floatnum = floatnum; - this.doublenum = doublenum; - } - - - @Override - public void readFields(DataInput in) throws IOException { - id.readFields(in); - intnum5.readFields(in); - intnum100.readFields(in); - intnum.readFields(in); - longnum.readFields(in); - floatnum.readFields(in); - doublenum.readFields(in); - } - - @Override - public void write(DataOutput out) throws IOException { - id.write(out); - intnum5.write(out); - intnum100.write(out); - intnum.write(out); - longnum.write(out); - floatnum.write(out); - doublenum.write(out); - - } - - } -} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/TypeDataCheck.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/TypeDataCheck.java deleted file mode 100644 index 7d7e384..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/TypeDataCheck.java +++ /dev/null @@ -1,183 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.utils; - -import java.io.IOException; -import java.util.Arrays; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.hadoop.util.GenericOptionsParser; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.InputJobInfo; - -/** - * This is a map reduce test for testing hcat that checks that the columns - * handed by hcat have the right type and right values. It achieves the first - * objective by checking the type of the Objects representing the columns against - * the schema provided as a cmdline arg. It achieves the second objective by - * writing the data as Text to be compared against golden results. - * - * The schema specification consists of the types as given by "describe
" - * with each column's type separated from the next column's type by a '+' - * - * Can be used against "numbers" and "complex" tables. - * - * Usage: hadoop jar testudf.jar typedatacheck - * <-libjars hive-hcat jar> - The argument controls the output delimiter. - The hcat jar location should be specified as file:// - */ -public class TypeDataCheck implements Tool { - - static String SCHEMA_KEY = "schema"; - static String DELIM = "delim"; - private static Configuration conf = new Configuration(); - - public static class TypeDataCheckMapper - extends Mapper { - - Long dummykey = null; - String[] types; - String delim = "\u0001"; - - @Override - protected void setup(org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - String typesStr = context.getConfiguration().get(SCHEMA_KEY); - delim = context.getConfiguration().get(DELIM); - if (delim.equals("tab")) { - delim = "\t"; - } else if (delim.equals("ctrla")) { - delim = "\u0001"; - } - types = typesStr.split("\\+"); - for (int i = 0; i < types.length; i++) { - types[i] = types[i].toLowerCase(); - } - - - } - - String check(HCatRecord r) throws IOException { - String s = ""; - for (int i = 0; i < r.size(); i++) { - s += Util.check(types[i], r.get(i)); - if (i != r.size() - 1) { - s += delim; - } - } - return s; - } - - @Override - protected void map(WritableComparable key, HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - context.write(dummykey, new Text(check(value))); - } - } - - public static void main(String[] args) throws Exception { - TypeDataCheck self = new TypeDataCheck(); - System.exit(ToolRunner.run(conf, self, args)); - } - - public int run(String[] args) { - try { - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - String[] otherArgs = new String[5]; - int j = 0; - for (int i = 0; i < args.length; i++) { - if (args[i].equals("-libjars")) { - conf.set("tmpjars", args[i + 1]); - i = i + 1; // skip it , the for loop will skip its value - } else { - otherArgs[j++] = args[i]; - } - } - if (otherArgs.length != 5) { - System.err.println("Other args:" + Arrays.asList(otherArgs)); - System.err.println("Usage: hadoop jar testudf.jar typedatacheck " + - " " + - " <-libjars hive-hcat jar>\n" + - "The argument controls the output delimiter.\n" + - "The hcat jar location should be specified as file://\n"); - System.err.println(" The argument controls the output delimiter."); - System.exit(2); - } - String serverUri = otherArgs[0]; - String tableName = otherArgs[1]; - String schemaStr = otherArgs[2]; - String outputDir = otherArgs[3]; - String outputdelim = otherArgs[4]; - if (!outputdelim.equals("tab") && !outputdelim.equals("ctrla")) { - System.err.println("ERROR: Specify 'tab' or 'ctrla' for output delimiter"); - } - String dbName = "default"; - - String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) { - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - } - Job job = new Job(conf, "typedatacheck"); - // initialize HCatInputFormat - HCatInputFormat.setInput(job, InputJobInfo.create( - dbName, tableName, null)); - HCatSchema s = HCatInputFormat.getTableSchema(job); - job.getConfiguration().set(SCHEMA_KEY, schemaStr); - job.getConfiguration().set(DELIM, outputdelim); - job.setInputFormatClass(HCatInputFormat.class); - job.setOutputFormatClass(TextOutputFormat.class); - job.setJarByClass(TypeDataCheck.class); - job.setMapperClass(TypeDataCheckMapper.class); - job.setNumReduceTasks(0); - job.setOutputKeyClass(Long.class); - job.setOutputValueClass(Text.class); - FileOutputFormat.setOutputPath(job, new Path(outputDir)); - System.exit(job.waitForCompletion(true) ? 0 : 1); - return 0; - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - @Override - public Configuration getConf() { - return conf; - } - - @Override - public void setConf(Configuration conf) { - TypeDataCheck.conf = conf; - } - -} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/Util.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/Util.java deleted file mode 100644 index d98ed4e..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/Util.java +++ /dev/null @@ -1,106 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.utils; - -import java.io.IOException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; - -public class Util { - - static Map> typeMap = new HashMap>(); - - static { - typeMap.put("tinyint", Byte.class); - typeMap.put("smallint", Short.class); - typeMap.put("int", Integer.class); - typeMap.put("bigint", Long.class); - typeMap.put("float", Float.class); - typeMap.put("double", Double.class); - typeMap.put("string", String.class); - typeMap.put("boolean", Boolean.class); - typeMap.put("struct", List.class); - typeMap.put("map", Map.class); - typeMap.put("array>", List.class); - } - - public static void die(String expectedType, Object o) throws IOException { - throw new IOException("Expected " + expectedType + ", got " + - o.getClass().getName()); - } - - - public static String check(String type, Object o) throws IOException { - if (o == null) { - return "null"; - } - if (check(typeMap.get(type), o)) { - if (type.equals("map")) { - Map m = (Map) o; - check(m); - } else if (type.equals("array>")) { - List> listOfMaps = (List>) o; - for (Map m : listOfMaps) { - check(m); - } - } else if (type.equals("struct")) { - List l = (List) o; - if (!check(Integer.class, l.get(0)) || - !check(String.class, l.get(1)) || - !check(Double.class, l.get(2))) { - die("struct", l); - } - } - } else { - die(typeMap.get(type).getName(), o); - } - return o.toString(); - } - - /** - * @param m - * @throws IOException - */ - public static void check(Map m) throws IOException { - if (m == null) { - return; - } - for (Entry e : m.entrySet()) { - // just access key and value to ensure they are correct - if (!check(String.class, e.getKey())) { - die("String", e.getKey()); - } - if (!check(String.class, e.getValue())) { - die("String", e.getValue()); - } - } - - } - - public static boolean check(Class expected, Object actual) { - if (actual == null) { - return true; - } - return expected.isAssignableFrom(actual.getClass()); - } - -} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/WriteJson.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/WriteJson.java deleted file mode 100644 index d25464f..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/WriteJson.java +++ /dev/null @@ -1,118 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.utils; - -import java.io.IOException; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.util.GenericOptionsParser; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.HCatOutputFormat; -import org.apache.hcatalog.mapreduce.InputJobInfo; -import org.apache.hcatalog.mapreduce.OutputJobInfo; - -/** - * This is a map reduce test for testing hcat which goes against the "numbers" - * table. It performs a group by on the first column and a SUM operation on the - * other columns. This is to simulate a typical operation in a map reduce - * program to test that hcat hands the right data to the map reduce program - * - * Usage: hadoop jar sumnumbers <-libjars hive-hcat - * jar> The argument controls the output delimiter The hcat jar - * location should be specified as file:// - */ -public class WriteJson extends Configured implements Tool { - - public static class Map extends - Mapper { - - String s; - Integer i; - Double d; - - @Override - protected void map( - WritableComparable key, - HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - s = value.get(0) == null ? null : (String) value.get(0); - i = value.get(1) == null ? null : (Integer) value.get(1); - d = value.get(2) == null ? null : (Double) value.get(2); - - HCatRecord record = new DefaultHCatRecord(5); - record.set(0, s); - record.set(1, i); - record.set(2, d); - - context.write(null, record); - - } - } - - public int run(String[] args) throws Exception { - Configuration conf = getConf(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - - String serverUri = args[0]; - String inputTableName = args[1]; - String outputTableName = args[2]; - String dbName = null; - - String principalID = System - .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "WriteJson"); - HCatInputFormat.setInput(job, InputJobInfo.create(dbName, - inputTableName, null)); - // initialize HCatOutputFormat - - job.setInputFormatClass(HCatInputFormat.class); - job.setJarByClass(WriteJson.class); - job.setMapperClass(Map.class); - job.setOutputKeyClass(WritableComparable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - job.setNumReduceTasks(0); - HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, - outputTableName, null)); - HCatSchema s = HCatInputFormat.getTableSchema(job); - System.err.println("INFO: output schema explicitly set for writing:" - + s); - HCatOutputFormat.setSchema(job, s); - job.setOutputFormatClass(HCatOutputFormat.class); - return (job.waitForCompletion(true) ? 0 : 1); - } - - public static void main(String[] args) throws Exception { - int exitCode = ToolRunner.run(new WriteJson(), args); - System.exit(exitCode); - } -} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/WriteRC.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/WriteRC.java deleted file mode 100644 index 736f104..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/WriteRC.java +++ /dev/null @@ -1,120 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.utils; - -import java.io.IOException; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.util.GenericOptionsParser; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.HCatOutputFormat; -import org.apache.hcatalog.mapreduce.InputJobInfo; -import org.apache.hcatalog.mapreduce.OutputJobInfo; - -/** - * This is a map reduce test for testing hcat which goes against the "numbers" - * table. It performs a group by on the first column and a SUM operation on the - * other columns. This is to simulate a typical operation in a map reduce - * program to test that hcat hands the right data to the map reduce program - * - * Usage: hadoop jar sumnumbers <-libjars hive-hcat - * jar> The argument controls the output delimiter The hcat jar - * location should be specified as file:// - */ -public class WriteRC extends Configured implements Tool { - - public static class Map extends - Mapper { - - String name; - Integer age; - Double gpa; - - @Override - protected void map( - WritableComparable key, - HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - name = value.get(0) == null ? null : (String) value.get(0); - age = value.get(1) == null ? null : (Integer) value.get(1); - gpa = value.get(2) == null ? null : (Double) value.get(2); - - if (gpa != null) gpa = Math.floor(gpa) + 0.1; - - HCatRecord record = new DefaultHCatRecord(5); - record.set(0, name); - record.set(1, age); - record.set(2, gpa); - - context.write(null, record); - - } - } - - public int run(String[] args) throws Exception { - Configuration conf = getConf(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - - String serverUri = args[0]; - String inputTableName = args[1]; - String outputTableName = args[2]; - String dbName = null; - - String principalID = System - .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "WriteRC"); - HCatInputFormat.setInput(job, InputJobInfo.create(dbName, - inputTableName, null)); - // initialize HCatOutputFormat - - job.setInputFormatClass(HCatInputFormat.class); - job.setJarByClass(WriteRC.class); - job.setMapperClass(Map.class); - job.setOutputKeyClass(WritableComparable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - job.setNumReduceTasks(0); - HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, - outputTableName, null)); - HCatSchema s = HCatInputFormat.getTableSchema(job); - System.err.println("INFO: output schema explicitly set for writing:" - + s); - HCatOutputFormat.setSchema(job, s); - job.setOutputFormatClass(HCatOutputFormat.class); - return (job.waitForCompletion(true) ? 0 : 1); - } - - public static void main(String[] args) throws Exception { - int exitCode = ToolRunner.run(new WriteRC(), args); - System.exit(exitCode); - } -} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/WriteText.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/WriteText.java deleted file mode 100644 index 6f170fa..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/WriteText.java +++ /dev/null @@ -1,130 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.utils; - -import java.io.IOException; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.util.GenericOptionsParser; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.HCatOutputFormat; -import org.apache.hcatalog.mapreduce.InputJobInfo; -import org.apache.hcatalog.mapreduce.OutputJobInfo; - -/** - * This is a map reduce test for testing hcat which goes against the "numbers" - * table. It performs a group by on the first column and a SUM operation on the - * other columns. This is to simulate a typical operation in a map reduce - * program to test that hcat hands the right data to the map reduce program - * - * Usage: hadoop jar sumnumbers <-libjars hive-hcat - * jar> The argument controls the output delimiter The hcat jar - * location should be specified as file:// - */ -public class WriteText extends Configured implements Tool { - - public static class Map extends - Mapper { - - byte t; - short si; - int i; - long b; - float f; - double d; - String s; - - @Override - protected void map( - WritableComparable key, - HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - t = (Byte) value.get(0); - si = (Short) value.get(1); - i = (Integer) value.get(2); - b = (Long) value.get(3); - f = (Float) value.get(4); - d = (Double) value.get(5); - s = (String) value.get(6); - - HCatRecord record = new DefaultHCatRecord(7); - record.set(0, t); - record.set(1, si); - record.set(2, i); - record.set(3, b); - record.set(4, f); - record.set(5, d); - record.set(6, s); - - context.write(null, record); - - } - } - - public int run(String[] args) throws Exception { - Configuration conf = getConf(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - - String serverUri = args[0]; - String inputTableName = args[1]; - String outputTableName = args[2]; - String dbName = null; - - String principalID = System - .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "WriteText"); - HCatInputFormat.setInput(job, InputJobInfo.create(dbName, - inputTableName, null)); - // initialize HCatOutputFormat - - job.setInputFormatClass(HCatInputFormat.class); - job.setJarByClass(WriteText.class); - job.setMapperClass(Map.class); - job.setOutputKeyClass(WritableComparable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - job.setNumReduceTasks(0); - HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, - outputTableName, null)); - HCatSchema s = HCatInputFormat.getTableSchema(job); - System.err.println("INFO: output schema explicitly set for writing:" - + s); - HCatOutputFormat.setSchema(job, s); - job.setOutputFormatClass(HCatOutputFormat.class); - return (job.waitForCompletion(true) ? 0 : 1); - } - - public static void main(String[] args) throws Exception { - int exitCode = ToolRunner.run(new WriteText(), args); - System.exit(exitCode); - } -} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/WriteTextPartitioned.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/WriteTextPartitioned.java deleted file mode 100644 index eff066e..0000000 --- hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/WriteTextPartitioned.java +++ /dev/null @@ -1,133 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.utils; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.util.GenericOptionsParser; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.HCatOutputFormat; -import org.apache.hcatalog.mapreduce.InputJobInfo; -import org.apache.hcatalog.mapreduce.OutputJobInfo; - -/** - * This is a map reduce test for testing hcat writing to partitioned tables. - * table. It performs a group by on the first column and a SUM operation on the - * other columns. This is to simulate a typical operation in a map reduce - * program to test that hcat hands the right data to the map reduce program - * - * Usage: hadoop jar org.apache.hcatalog.utils.HBaseReadWrite -libjars - * <hcat_jar> * <serveruri> <input_tablename> <output_tablename> [filter] - * If filter is given it will be provided as the partition to write to. - */ -public class WriteTextPartitioned extends Configured implements Tool { - - static String filter = null; - - public static class Map extends - Mapper { - - @Override - protected void map( - WritableComparable key, - HCatRecord value, - org.apache.hadoop.mapreduce.Mapper.Context context) - throws IOException, InterruptedException { - String name = (String) value.get(0); - int age = (Integer) value.get(1); - String ds = (String) value.get(3); - - HCatRecord record = (filter == null ? new DefaultHCatRecord(3) : new DefaultHCatRecord(2)); - record.set(0, name); - record.set(1, age); - if (filter == null) record.set(2, ds); - - context.write(null, record); - - } - } - - public int run(String[] args) throws Exception { - Configuration conf = getConf(); - args = new GenericOptionsParser(conf, args).getRemainingArgs(); - - String serverUri = args[0]; - String inputTableName = args[1]; - String outputTableName = args[2]; - if (args.length > 3) filter = args[3]; - String dbName = null; - - String principalID = System - .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); - if (principalID != null) - conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); - Job job = new Job(conf, "WriteTextPartitioned"); - HCatInputFormat.setInput(job, InputJobInfo.create(dbName, - inputTableName, filter)); - // initialize HCatOutputFormat - - job.setInputFormatClass(HCatInputFormat.class); - job.setJarByClass(WriteTextPartitioned.class); - job.setMapperClass(Map.class); - job.setOutputKeyClass(WritableComparable.class); - job.setOutputValueClass(DefaultHCatRecord.class); - job.setNumReduceTasks(0); - - java.util.Map partitionVals = null; - if (filter != null) { - String[] s = filter.split("="); - String val = s[1].replace('"', ' ').trim(); - partitionVals = new HashMap(1); - partitionVals.put(s[0], val); - } - HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, - outputTableName, partitionVals)); - HCatSchema s = HCatInputFormat.getTableSchema(job); - // Build the schema for this table, which is slightly different than the - // schema for the input table - List fss = new ArrayList(3); - fss.add(s.get(0)); - fss.add(s.get(1)); - fss.add(s.get(3)); - HCatOutputFormat.setSchema(job, new HCatSchema(fss)); - job.setOutputFormatClass(HCatOutputFormat.class); - return (job.waitForCompletion(true) ? 0 : 1); - } - - public static void main(String[] args) throws Exception { - int exitCode = ToolRunner.run(new WriteTextPartitioned(), args); - System.exit(exitCode); - } -} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataReaderMaster.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataReaderMaster.java new file mode 100644 index 0000000..437bee3 --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataReaderMaster.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.utils; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.IOException; +import java.io.ObjectOutputStream; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Properties; + +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.transfer.DataTransferFactory; +import org.apache.hive.hcatalog.data.transfer.HCatReader; +import org.apache.hive.hcatalog.data.transfer.ReadEntity; +import org.apache.hive.hcatalog.data.transfer.ReaderContext; + +public class DataReaderMaster { + + public static void main(String[] args) throws FileNotFoundException, IOException { + + // This config contains all the configuration that master node wants to provide + // to the HCatalog. + Properties externalConfigs = new Properties(); + externalConfigs.load(new FileReader(args[0])); + Map config = new HashMap(); + + for (Entry kv : externalConfigs.entrySet()) { + config.put((String) kv.getKey(), (String) kv.getValue()); + } + + // This piece of code runs in master node and gets necessary context. + ReaderContext context = runsInMaster(config); + + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(new File(args[1]))); + oos.writeObject(context); + oos.flush(); + oos.close(); + // Master node will serialize readercontext and will make it available at slaves. + } + + private static ReaderContext runsInMaster(Map config) throws HCatException { + + ReadEntity.Builder builder = new ReadEntity.Builder(); + ReadEntity entity = builder.withTable(config.get("table")).build(); + HCatReader reader = DataTransferFactory.getHCatReader(entity, config); + ReaderContext cntxt = reader.prepareRead(); + return cntxt; + } +} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataReaderSlave.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataReaderSlave.java new file mode 100644 index 0000000..688b736 --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataReaderSlave.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.utils; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.util.Iterator; +import java.util.List; + +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.transfer.DataTransferFactory; +import org.apache.hive.hcatalog.data.transfer.HCatReader; +import org.apache.hive.hcatalog.data.transfer.ReaderContext; + +public class DataReaderSlave { + + public static void main(String[] args) throws IOException, ClassNotFoundException { + + ObjectInputStream ois = new ObjectInputStream(new FileInputStream(new File(args[0]))); + ReaderContext cntxt = (ReaderContext) ois.readObject(); + ois.close(); + + String[] inpSlitsToRead = args[1].split(","); + List splits = cntxt.getSplits(); + + for (int i = 0; i < inpSlitsToRead.length; i++) { + InputSplit split = splits.get(Integer.parseInt(inpSlitsToRead[i])); + HCatReader reader = DataTransferFactory.getHCatReader(split, cntxt.getConf()); + Iterator itr = reader.read(); + File f = new File(args[2] + "-" + i); + f.delete(); + BufferedWriter outFile = new BufferedWriter(new FileWriter(f)); + while (itr.hasNext()) { + String rec = itr.next().toString().replaceFirst("\\s+$", ""); + System.err.println(rec); + outFile.write(rec + "\n"); + } + outFile.close(); + } + } +} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataWriterMaster.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataWriterMaster.java new file mode 100644 index 0000000..0b92035 --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataWriterMaster.java @@ -0,0 +1,96 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.utils; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.Map.Entry; + +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.transfer.DataTransferFactory; +import org.apache.hive.hcatalog.data.transfer.HCatWriter; +import org.apache.hive.hcatalog.data.transfer.WriteEntity; +import org.apache.hive.hcatalog.data.transfer.WriterContext; + +public class DataWriterMaster { + + public static void main(String[] args) throws FileNotFoundException, IOException, ClassNotFoundException { + + // This config contains all the configuration that master node wants to provide + // to the HCatalog. + Properties externalConfigs = new Properties(); + externalConfigs.load(new FileReader(args[0])); + Map config = new HashMap(); + + for (Entry kv : externalConfigs.entrySet()) { + System.err.println("k: " + kv.getKey() + "\t v: " + kv.getValue()); + config.put((String) kv.getKey(), (String) kv.getValue()); + } + + if (args.length == 3 && "commit".equalsIgnoreCase(args[2])) { + // Then, master commits if everything goes well. + ObjectInputStream ois = new ObjectInputStream(new FileInputStream(new File(args[1]))); + WriterContext cntxt = (WriterContext) ois.readObject(); + commit(config, true, cntxt); + System.exit(0); + } + // This piece of code runs in master node and gets necessary context. + WriterContext cntxt = runsInMaster(config); + + + // Master node will serialize writercontext and will make it available at slaves. + File f = new File(args[1]); + f.delete(); + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(f)); + oos.writeObject(cntxt); + oos.flush(); + oos.close(); + } + + private static WriterContext runsInMaster(Map config) throws HCatException { + + WriteEntity.Builder builder = new WriteEntity.Builder(); + WriteEntity entity = builder.withTable(config.get("table")).build(); + HCatWriter writer = DataTransferFactory.getHCatWriter(entity, config); + WriterContext info = writer.prepareWrite(); + return info; + } + + private static void commit(Map config, boolean status, WriterContext cntxt) throws HCatException { + + WriteEntity.Builder builder = new WriteEntity.Builder(); + WriteEntity entity = builder.withTable(config.get("table")).build(); + HCatWriter writer = DataTransferFactory.getHCatWriter(entity, config); + if (status) { + writer.commit(cntxt); + } else { + writer.abort(cntxt); + } + } +} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataWriterSlave.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataWriterSlave.java new file mode 100644 index 0000000..b57113c --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/DataWriterSlave.java @@ -0,0 +1,87 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.utils; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.transfer.DataTransferFactory; +import org.apache.hive.hcatalog.data.transfer.HCatWriter; +import org.apache.hive.hcatalog.data.transfer.WriterContext; + +public class DataWriterSlave { + + public static void main(String[] args) throws FileNotFoundException, IOException, ClassNotFoundException { + + ObjectInputStream ois = new ObjectInputStream(new FileInputStream(args[0])); + WriterContext cntxt = (WriterContext) ois.readObject(); + ois.close(); + + HCatWriter writer = DataTransferFactory.getHCatWriter(cntxt); + writer.write(new HCatRecordItr(args[1])); + + } + + private static class HCatRecordItr implements Iterator { + + BufferedReader reader; + String curLine; + + public HCatRecordItr(String fileName) throws FileNotFoundException { + reader = new BufferedReader(new FileReader(new File(fileName))); + } + + @Override + public boolean hasNext() { + try { + curLine = reader.readLine(); + } catch (IOException e) { + e.printStackTrace(); + } + return null == curLine ? false : true; + } + + @Override + public HCatRecord next() { + + String[] fields = curLine.split("\t"); + List data = new ArrayList(3); + data.add(fields[0]); + data.add(Integer.parseInt(fields[1])); + data.add(Double.parseDouble(fields[2])); + return new DefaultHCatRecord(data); + } + + @Override + public void remove() { + // TODO Auto-generated method stub + + } + } +} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/GroupByAge.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/GroupByAge.java new file mode 100644 index 0000000..78c0811 --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/GroupByAge.java @@ -0,0 +1,134 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.utils; + +import java.io.IOException; +import java.util.Iterator; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hive.hcatalog.mapreduce.HCatOutputFormat; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; +import org.apache.hive.hcatalog.mapreduce.OutputJobInfo; + +/** + * This is a map reduce test for testing hcat which goes against the "numbers" + * table. It performs a group by on the first column and a SUM operation on the + * other columns. This is to simulate a typical operation in a map reduce + * program to test that hcat hands the right data to the map reduce program + * + * Usage: hadoop jar sumnumbers <-libjars hive-hcat + * jar> The argument controls the output delimiter The hcat jar + * location should be specified as file:// + */ +public class GroupByAge extends Configured implements Tool { + + public static class Map extends + Mapper { + + int age; + + @Override + protected void map( + WritableComparable key, + HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + age = (Integer) value.get(1); + context.write(new IntWritable(age), new IntWritable(1)); + } + } + + public static class Reduce extends Reducer { + + + @Override + protected void reduce(IntWritable key, java.lang.Iterable + values, org.apache.hadoop.mapreduce.Reducer.Context context) + throws IOException, InterruptedException { + int sum = 0; + Iterator iter = values.iterator(); + while (iter.hasNext()) { + sum++; + iter.next(); + } + HCatRecord record = new DefaultHCatRecord(2); + record.set(0, key.get()); + record.set(1, sum); + + context.write(null, record); + } + } + + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + + String serverUri = args[0]; + String inputTableName = args[1]; + String outputTableName = args[2]; + String dbName = null; + + String principalID = System + .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "GroupByAge"); + HCatInputFormat.setInput(job, InputJobInfo.create(dbName, + inputTableName, null)); + // initialize HCatOutputFormat + + job.setInputFormatClass(HCatInputFormat.class); + job.setJarByClass(GroupByAge.class); + job.setMapperClass(Map.class); + job.setReducerClass(Reduce.class); + job.setMapOutputKeyClass(IntWritable.class); + job.setMapOutputValueClass(IntWritable.class); + job.setOutputKeyClass(WritableComparable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, + outputTableName, null)); + HCatSchema s = HCatOutputFormat.getTableSchema(job); + System.err.println("INFO: output schema explicitly set for writing:" + + s); + HCatOutputFormat.setSchema(job, s); + job.setOutputFormatClass(HCatOutputFormat.class); + return (job.waitForCompletion(true) ? 0 : 1); + } + + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new GroupByAge(), args); + System.exit(exitCode); + } +} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HBaseReadWrite.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HBaseReadWrite.java new file mode 100644 index 0000000..a0808d3 --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HBaseReadWrite.java @@ -0,0 +1,192 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.utils; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hive.hcatalog.mapreduce.HCatOutputFormat; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; +import org.apache.hive.hcatalog.mapreduce.OutputJobInfo; + +/** + * This is a map reduce test for testing hcat which goes against the "numbers" + * table. It performs a group by on the first column and a SUM operation on the + * other columns. This is to simulate a typical operation in a map reduce + * program to test that hcat hands the right data to the map reduce program + * + * Usage: hadoop jar sumnumbers <-libjars hive-hcat + * jar> The argument controls the output delimiter The hcat jar + * location should be specified as file:// + */ +public class HBaseReadWrite extends Configured implements Tool { + + public static class HBaseWriteMap extends + Mapper { + + String name; + String age; + String gpa; + + @Override + protected void map( + LongWritable key, + Text value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + String line = value.toString(); + String[] tokens = line.split("\t"); + name = tokens[0]; + + context.write(new Text(name), value); + } + } + + + public static class HBaseWriteReduce extends + Reducer { + + String name; + String age; + String gpa; + + @Override + protected void reduce(Text key, Iterable values, Context context) + throws IOException, InterruptedException { + name = key.toString(); + int count = 0; + double sum = 0; + for (Text value : values) { + String line = value.toString(); + String[] tokens = line.split("\t"); + name = tokens[0]; + age = tokens[1]; + gpa = tokens[2]; + + count++; + sum += Double.parseDouble(gpa.toString()); + } + + HCatRecord record = new DefaultHCatRecord(2); + record.set(0, name); + record.set(1, Double.toString(sum)); + + context.write(null, record); + } + } + + public static class HBaseReadMap extends + Mapper { + + String name; + String age; + String gpa; + + @Override + protected void map( + WritableComparable key, + HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + name = (String) value.get(0); + gpa = (String) value.get(1); + context.write(new Text(name), new Text(gpa)); + } + } + + + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + + String serverUri = args[0]; + String inputDir = args[1]; + String tableName = args[2]; + String outputDir = args[3]; + String dbName = null; + + String principalID = System + .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + conf.set("hcat.hbase.output.bulkMode", "false"); + Job job = new Job(conf, "HBaseWrite"); + FileInputFormat.setInputPaths(job, inputDir); + + job.setInputFormatClass(TextInputFormat.class); + job.setOutputFormatClass(HCatOutputFormat.class); + job.setJarByClass(HBaseReadWrite.class); + job.setMapperClass(HBaseWriteMap.class); + job.setMapOutputKeyClass(Text.class); + job.setMapOutputValueClass(Text.class); + job.setReducerClass(HBaseWriteReduce.class); + job.setOutputKeyClass(WritableComparable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, + tableName, null)); + + boolean succ = job.waitForCompletion(true); + + if (!succ) return 1; + + job = new Job(conf, "HBaseRead"); + HCatInputFormat.setInput(job, InputJobInfo.create(dbName, tableName, + null)); + + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + job.setJarByClass(HBaseReadWrite.class); + job.setMapperClass(HBaseReadMap.class); + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + TextOutputFormat.setOutputPath(job, new Path(outputDir)); + + succ = job.waitForCompletion(true); + + if (!succ) return 2; + + return 0; + } + + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new HBaseReadWrite(), args); + System.exit(exitCode); + } +} + diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HCatTestDriver.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HCatTestDriver.java new file mode 100644 index 0000000..21ffeb0 --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HCatTestDriver.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.utils; + +import org.apache.hadoop.util.ProgramDriver; + +/** + * A description of an example program based on its class and a + * human-readable description. + */ +public class HCatTestDriver { + + public static void main(String argv[]) { + int exitCode = -1; + ProgramDriver pgd = new ProgramDriver(); + try { + pgd.addClass("typedatacheck", TypeDataCheck.class, + "A map/reduce program that checks the type of each field and" + + " outputs the entire table (to test hcat)."); + pgd.addClass("sumnumbers", SumNumbers.class, + "A map/reduce program that performs a group by on the first column and a " + + "SUM operation on the other columns of the \"numbers\" table."); + pgd.addClass("storenumbers", StoreNumbers.class, "A map/reduce program that " + + "reads from the \"numbers\" table and adds 10 to each fields and writes " + + "to the \"numbers_partitioned\" table into the datestamp=20100101 " + + "partition OR the \"numbers_empty_initially\" table based on a " + + "cmdline arg"); + pgd.addClass("storecomplex", StoreComplex.class, "A map/reduce program that " + + "reads from the \"complex\" table and stores as-is into the " + + "\"complex_empty_initially\" table."); + pgd.addClass("storedemo", StoreDemo.class, "demo prog."); + pgd.driver(argv); + + // Success + exitCode = 0; + } catch (Throwable e) { + e.printStackTrace(); + } + + System.exit(exitCode); + } +} + diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HCatTypeCheck.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HCatTypeCheck.java new file mode 100644 index 0000000..3435ae6 --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HCatTypeCheck.java @@ -0,0 +1,151 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.utils; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.pig.EvalFunc; +import org.apache.pig.data.DataBag; +import org.apache.pig.data.DataType; +import org.apache.pig.data.Tuple; +import org.apache.pig.impl.logicalLayer.schema.Schema; +import org.apache.pig.impl.util.Utils; + +/** + * This UDF can be used to check that a tuple presented by HCatLoader has the + * right types for the fields + * + * Usage is : + * + * register testudf.jar; + * a = load 'numbers' using HCatLoader(...); + * b = foreach a generate HCatTypeCheck('intnum1000:int,id:int,intnum5:int,intnum100:int,intnum:int,longnum:long,floatnum:float,doublenum:double', *); + * store b into 'output'; + * + * The schema string (the first argument to the UDF) is of the form one would provide in a + * pig load statement. + * + * The output should only contain the value '1' in all rows. (This UDF returns + * the integer value 1 if all fields have the right type, else throws IOException) + * + */ +public class HCatTypeCheck extends EvalFunc { + + static HashMap> typeMap = new HashMap>(); + + @Override + public Integer exec(Tuple input) throws IOException { + String schemaStr = (String) input.get(0); + Schema s = null; + try { + s = getSchemaFromString(schemaStr); + } catch (Exception e) { + throw new IOException(e); + } + for (int i = 0; i < s.size(); i++) { + check(s.getField(i).type, input.get(i + 1)); // input.get(i+1) since input.get(0) is the schema; + } + return 1; + } + + static { + typeMap.put(DataType.INTEGER, Integer.class); + typeMap.put(DataType.LONG, Long.class); + typeMap.put(DataType.FLOAT, Float.class); + typeMap.put(DataType.DOUBLE, Double.class); + typeMap.put(DataType.CHARARRAY, String.class); + typeMap.put(DataType.TUPLE, Tuple.class); + typeMap.put(DataType.MAP, Map.class); + typeMap.put(DataType.BAG, DataBag.class); + } + + + private void die(String expectedType, Object o) throws IOException { + throw new IOException("Expected " + expectedType + ", got " + + o.getClass().getName()); + } + + + private String check(Byte type, Object o) throws IOException { + if (o == null) { + return ""; + } + if (check(typeMap.get(type), o)) { + if (type.equals(DataType.MAP)) { + Map m = (Map) o; + check(m); + } else if (type.equals(DataType.BAG)) { + DataBag bg = (DataBag) o; + for (Tuple tuple : bg) { + Map m = (Map) tuple.get(0); + check(m); + } + } else if (type.equals(DataType.TUPLE)) { + Tuple t = (Tuple) o; + if (!check(Integer.class, t.get(0)) || + !check(String.class, t.get(1)) || + !check(Double.class, t.get(2))) { + die("t:tuple(num:int,str:string,dbl:double)", t); + } + } + } else { + die(typeMap.get(type).getName(), o); + } + return o.toString(); + } + + /** + * @param m + * @throws IOException + */ + private void check(Map m) throws IOException { + for (Entry e : m.entrySet()) { + // just access key and value to ensure they are correct + if (!check(String.class, e.getKey())) { + die("String", e.getKey()); + } + if (!check(String.class, e.getValue())) { + die("String", e.getValue()); + } + } + + } + + private boolean check(Class expected, Object actual) { + if (actual == null) { + return true; + } + return expected.isAssignableFrom(actual.getClass()); + } + + Schema getSchemaFromString(String schemaString) throws Exception { + /** ByteArrayInputStream stream = new ByteArrayInputStream(schemaString.getBytes()) ; + QueryParser queryParser = new QueryParser(stream) ; + Schema schema = queryParser.TupleSchema() ; + Schema.setSchemaDefaultType(schema, org.apache.pig.data.DataType.BYTEARRAY); + return schema; + */ + return Utils.getSchemaFromString(schemaString); + } + +} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HCatTypeCheckHive.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HCatTypeCheckHive.java new file mode 100644 index 0000000..19a74e9 --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/HCatTypeCheckHive.java @@ -0,0 +1,142 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.utils; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; + +/** + * A hive udf to check types of the fields read from hcat. A sample hive query which can use this is: + * + * create temporary function typecheck as 'org.apache.hive.hcatalog.utils.HCatTypeCheckHive'; + * select typecheck('map+struct+array>+int', + * mymap, mytuple, bagofmap, rownum) from complex; + * + * + * The first argument to the UDF is a string representing the schema of the columns in the table. + * The columns in the tables are the remaining args to it. + * The schema specification consists of the types as given by "describe
" + * with each column's type separated from the next column's type by a '+' + * + * The UDF will throw an exception (and cause the query to fail) if it does not + * encounter the correct types. + * + * The output is a string representation of the data , type and hive category. + * It is not advisable to use this against large dataset since the output would also + * be large. + * + */ +public final class HCatTypeCheckHive extends GenericUDF { + + ObjectInspector[] argOIs; + + @Override + public Object evaluate(DeferredObject[] args) throws HiveException { + List row = new ArrayList(); + String typesStr = (String) getJavaObject(args[0].get(), argOIs[0], new ArrayList()); + String[] types = typesStr.split("\\+"); + for (int i = 0; i < types.length; i++) { + types[i] = types[i].toLowerCase(); + } + for (int i = 1; i < args.length; i++) { + ObjectInspector oi = argOIs[i]; + List categories = new ArrayList(); + Object o = getJavaObject(args[i].get(), oi, categories); + try { + if (o != null) { + Util.check(types[i - 1], o); + } + } catch (IOException e) { + throw new HiveException(e); + } + row.add(o == null ? "null" : o); + row.add(":" + (o == null ? "null" : o.getClass()) + ":" + categories); + } + return row.toString(); + } + + private Object getJavaObject(Object o, ObjectInspector oi, List categories) { + if (categories != null) { + categories.add(oi.getCategory()); + } + if (oi.getCategory() == ObjectInspector.Category.LIST) { + List l = ((ListObjectInspector) oi).getList(o); + List result = new ArrayList(); + ObjectInspector elemOI = ((ListObjectInspector) oi).getListElementObjectInspector(); + for (Object lo : l) { + result.add(getJavaObject(lo, elemOI, categories)); + } + return result; + } else if (oi.getCategory() == ObjectInspector.Category.MAP) { + Map m = ((MapObjectInspector) oi).getMap(o); + Map result = new HashMap(); + ObjectInspector koi = ((MapObjectInspector) oi).getMapKeyObjectInspector(); + ObjectInspector voi = ((MapObjectInspector) oi).getMapValueObjectInspector(); + for (Entry e : m.entrySet()) { + result.put((String) getJavaObject(e.getKey(), koi, null), + (String) getJavaObject(e.getValue(), voi, null)); + } + return result; + + } else if (oi.getCategory() == ObjectInspector.Category.STRUCT) { + List s = ((StructObjectInspector) oi).getStructFieldsDataAsList(o); + List sf = ((StructObjectInspector) oi).getAllStructFieldRefs(); + List result = new ArrayList(); + for (int i = 0; i < s.size(); i++) { + result.add(getJavaObject(s.get(i), sf.get(i).getFieldObjectInspector(), categories)); + } + return result; + } else if (oi.getCategory() == ObjectInspector.Category.PRIMITIVE) { + return ((PrimitiveObjectInspector) oi).getPrimitiveJavaObject(o); + } + throw new RuntimeException("Unexpected error!"); + } + + @Override + public String getDisplayString(String[] arg0) { + return null; + } + + @Override + public ObjectInspector initialize(ObjectInspector[] argOIs) + throws UDFArgumentException { + this.argOIs = argOIs; + return ObjectInspectorFactory.getReflectionObjectInspector(String.class, + ObjectInspectorOptions.JAVA); + } + +} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadJson.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadJson.java new file mode 100644 index 0000000..059dcdd --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadJson.java @@ -0,0 +1,112 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.utils; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; + +/** + * This is a map reduce test for testing hcat which goes against the "numbers" + * table. It performs a group by on the first column and a SUM operation on the + * other columns. This is to simulate a typical operation in a map reduce program + * to test that hcat hands the right data to the map reduce program + * + * Usage: hadoop jar sumnumbers <-libjars hive-hcat jar> + The argument controls the output delimiter + The hcat jar location should be specified as file:// + */ +public class ReadJson extends Configured implements Tool { + + public static class Map + extends Mapper { + + String s; + Integer i; + Double d; + + @Override + protected void map(WritableComparable key, HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + s = value.get(0) == null ? null : (String) value.get(0); + i = value.get(1) == null ? null : (Integer) value.get(1); + d = value.get(2) == null ? null : (Double) value.get(2); + + HCatRecord record = new DefaultHCatRecord(3); + record.set(0, s); + record.set(1, i); + record.set(2, d); + + context.write(null, record); + + } + } + + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + + String serverUri = args[0]; + String tableName = args[1]; + String outputDir = args[2]; + String dbName = null; + + String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "ReadJson"); + HCatInputFormat.setInput(job, InputJobInfo.create( + dbName, tableName, null)); + // initialize HCatOutputFormat + + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + job.setJarByClass(ReadJson.class); + job.setMapperClass(Map.class); + job.setOutputKeyClass(IntWritable.class); + job.setOutputValueClass(HCatRecord.class); + job.setNumReduceTasks(0); + FileOutputFormat.setOutputPath(job, new Path(outputDir)); + return (job.waitForCompletion(true) ? 0 : 1); + } + + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new ReadJson(), args); + System.exit(exitCode); + } +} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadRC.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadRC.java new file mode 100644 index 0000000..415bce9 --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadRC.java @@ -0,0 +1,113 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.utils; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; + +/** + * This is a map reduce test for testing hcat which goes against the "numbers" + * table. It performs a group by on the first column and a SUM operation on the + * other columns. This is to simulate a typical operation in a map reduce program + * to test that hcat hands the right data to the map reduce program + * + * Usage: hadoop jar sumnumbers <-libjars hive-hcat jar> + The argument controls the output delimiter + The hcat jar location should be specified as file:// + */ +public class ReadRC extends Configured implements Tool { + + public static class Map + extends Mapper { + + String name; + int age; + double gpa; + + @Override + protected void map(WritableComparable key, HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + name = (String) value.get(0); + age = (Integer) value.get(1); + gpa = (Double) value.get(2); + gpa = Math.floor(gpa) + 0.1; + + HCatRecord record = new DefaultHCatRecord(3); + record.set(0, name); + record.set(1, age); + record.set(2, gpa); + + context.write(null, record); + + } + } + + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + + String serverUri = args[0]; + String tableName = args[1]; + String outputDir = args[2]; + String dbName = null; + + String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "ReadRC"); + HCatInputFormat.setInput(job, InputJobInfo.create( + dbName, tableName, null)); + // initialize HCatOutputFormat + + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + job.setJarByClass(ReadRC.class); + job.setMapperClass(Map.class); + job.setOutputKeyClass(IntWritable.class); + job.setOutputValueClass(HCatRecord.class); + job.setNumReduceTasks(0); + FileOutputFormat.setOutputPath(job, new Path(outputDir)); + return (job.waitForCompletion(true) ? 0 : 1); + } + + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new ReadRC(), args); + System.exit(exitCode); + } +} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadText.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadText.java new file mode 100644 index 0000000..497ffdb --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadText.java @@ -0,0 +1,124 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.utils; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; + +/** + * This is a map reduce test for testing hcat which goes against the "numbers" + * table. It performs a group by on the first column and a SUM operation on the + * other columns. This is to simulate a typical operation in a map reduce program + * to test that hcat hands the right data to the map reduce program + * + * Usage: hadoop jar sumnumbers <-libjars hive-hcat jar> + The argument controls the output delimiter + The hcat jar location should be specified as file:// + */ +public class ReadText extends Configured implements Tool { + + public static class Map + extends Mapper { + + byte t; + short si; + int i; + long b; + float f; + double d; + String s; + + @Override + protected void map(WritableComparable key, HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + t = (Byte) value.get(0); + si = (Short) value.get(1); + i = (Integer) value.get(2); + b = (Long) value.get(3); + f = (Float) value.get(4); + d = (Double) value.get(5); + s = (String) value.get(6); + + HCatRecord record = new DefaultHCatRecord(7); + record.set(0, t); + record.set(1, si); + record.set(2, i); + record.set(3, b); + record.set(4, f); + record.set(5, d); + record.set(6, s); + + context.write(null, record); + + } + } + + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + + String serverUri = args[0]; + String tableName = args[1]; + String outputDir = args[2]; + String dbName = null; + + String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "ReadText"); + HCatInputFormat.setInput(job, InputJobInfo.create( + dbName, tableName, null)); + // initialize HCatOutputFormat + + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + job.setJarByClass(ReadText.class); + job.setMapperClass(Map.class); + job.setOutputKeyClass(IntWritable.class); + job.setOutputValueClass(HCatRecord.class); + job.setNumReduceTasks(0); + FileOutputFormat.setOutputPath(job, new Path(outputDir)); + return (job.waitForCompletion(true) ? 0 : 1); + } + + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new ReadText(), args); + System.exit(exitCode); + } +} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadWrite.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadWrite.java new file mode 100644 index 0000000..0fefe33 --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/ReadWrite.java @@ -0,0 +1,112 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.utils; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hive.hcatalog.mapreduce.HCatOutputFormat; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; +import org.apache.hive.hcatalog.mapreduce.OutputJobInfo; + +/** + * This is a map reduce test for testing hcat which goes against the "numbers" + * table. It performs a group by on the first column and a SUM operation on the + * other columns. This is to simulate a typical operation in a map reduce + * program to test that hcat hands the right data to the map reduce program + * + * Usage: hadoop jar sumnumbers <-libjars hive-hcat + * jar> The argument controls the output delimiter The hcat jar + * location should be specified as file:// + */ +public class ReadWrite extends Configured implements Tool { + + public static class Map extends + Mapper { + + String name; + int age; + double gpa; + + @Override + protected void map( + WritableComparable key, + HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + name = (String) value.get(0); + age = (Integer) value.get(1); + gpa = (Double) value.get(2); + context.write(new Text(name), value); + + } + } + + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + + String serverUri = args[0]; + String inputTableName = args[1]; + String outputTableName = args[2]; + String dbName = null; + + String principalID = System + .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "ReadWrite"); + HCatInputFormat.setInput(job, InputJobInfo.create(dbName, + inputTableName, null)); + // initialize HCatOutputFormat + + job.setInputFormatClass(HCatInputFormat.class); + job.setJarByClass(ReadWrite.class); + job.setMapperClass(Map.class); + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(DefaultHCatRecord.class); + HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, + outputTableName, null)); + HCatSchema s = HCatInputFormat.getTableSchema(job); + System.err.println("INFO: output schema explicitly set for writing:" + + s); + HCatOutputFormat.setSchema(job, s); + job.setOutputFormatClass(HCatOutputFormat.class); + return (job.waitForCompletion(true) ? 0 : 1); + } + + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new ReadWrite(), args); + System.exit(exitCode); + } +} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/SimpleRead.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/SimpleRead.java new file mode 100644 index 0000000..ef2d6ed --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/SimpleRead.java @@ -0,0 +1,108 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.utils; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; + +/** + * This is a map reduce test for testing hcat which goes against the "numbers" + * table. It performs a group by on the first column and a SUM operation on the + * other columns. This is to simulate a typical operation in a map reduce program + * to test that hcat hands the right data to the map reduce program + * + * Usage: hadoop jar sumnumbers <-libjars hive-hcat jar> + The argument controls the output delimiter + The hcat jar location should be specified as file:// + */ +public class SimpleRead extends Configured implements Tool { + + private static final String TABLE_NAME = "studenttab10k"; + private static final String TAB = "\t"; + + public static class Map + extends Mapper { + + String name; + int age; + double gpa; + + @Override + protected void map(WritableComparable key, HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + name = (String) value.get(0); + age = (Integer) value.get(1); + gpa = (Double) value.get(2); + context.write(new Text(name), new IntWritable(age)); + + } + } + + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + + String serverUri = args[0]; + String tableName = args[1]; + String outputDir = args[2]; + String dbName = null; + + String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "SimpleRead"); + HCatInputFormat.setInput(job, InputJobInfo.create( + dbName, tableName, null)); + // initialize HCatOutputFormat + + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + job.setJarByClass(SimpleRead.class); + job.setMapperClass(Map.class); + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(IntWritable.class); + FileOutputFormat.setOutputPath(job, new Path(outputDir)); + return (job.waitForCompletion(true) ? 0 : 1); + } + + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new SimpleRead(), args); + System.exit(exitCode); + } +} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/StoreComplex.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/StoreComplex.java new file mode 100644 index 0000000..82749a8 --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/StoreComplex.java @@ -0,0 +1,136 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.utils; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hive.hcatalog.mapreduce.HCatOutputFormat; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; +import org.apache.hive.hcatalog.mapreduce.OutputJobInfo; + +/** + * This is a map reduce test for testing hcat which goes against the "complex" + * table and writes to "complex_nopart_empty_initially" table. It reads data from complex which + * is an unpartitioned table and stores the data as-is into complex_empty_initially table + * (which is also unpartitioned) + * + * Usage: hadoop jar testudf.jar storecomplex <-libjars hive-hcat jar> + The hcat jar location should be specified as file:// + */ +public class StoreComplex { + + private static final String COMPLEX_TABLE_NAME = "complex"; + private static final String COMPLEX_NOPART_EMPTY_INITIALLY_TABLE_NAME = "complex_nopart_empty_initially"; + + + public static class ComplexMapper + extends Mapper { + + @Override + protected void map(WritableComparable key, HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + // just write out the value as-is + context.write(new IntWritable(0), value); + + } + } + + + public static void main(String[] args) throws Exception { + Configuration conf = new Configuration(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + String[] otherArgs = new String[1]; + int j = 0; + for (int i = 0; i < args.length; i++) { + if (args[i].equals("-libjars")) { + // generic options parser doesn't seem to work! + conf.set("tmpjars", args[i + 1]); + i = i + 1; // skip it , the for loop will skip its value + } else { + otherArgs[j++] = args[i]; + } + } + if (otherArgs.length != 1) { + usage(); + } + String serverUri = otherArgs[0]; + String tableName = COMPLEX_TABLE_NAME; + String dbName = "default"; + Map outputPartitionKvps = new HashMap(); + String outputTableName = null; + outputTableName = COMPLEX_NOPART_EMPTY_INITIALLY_TABLE_NAME; + // test with null or empty randomly + if (new Random().nextInt(2) == 0) { + System.err.println("INFO: output partition keys set to null for writing"); + outputPartitionKvps = null; + } + String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "storecomplex"); + // initialize HCatInputFormat + + HCatInputFormat.setInput(job, InputJobInfo.create( + dbName, tableName, null)); + // initialize HCatOutputFormat + HCatOutputFormat.setOutput(job, OutputJobInfo.create( + dbName, outputTableName, outputPartitionKvps)); + + + HCatSchema s = HCatInputFormat.getTableSchema(job); + HCatOutputFormat.setSchema(job, s); + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(HCatOutputFormat.class); + job.setJarByClass(StoreComplex.class); + job.setMapperClass(ComplexMapper.class); + job.setOutputKeyClass(IntWritable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + System.exit(job.waitForCompletion(true) ? 0 : 1); + } + + + /** + * + */ + private static void usage() { + System.err.println("Usage: hadoop jar testudf.jar storecomplex <-libjars hive-hcat jar>\n" + + "The hcat jar location should be specified as file://\n"); + System.exit(2); + + } + + +} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/StoreDemo.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/StoreDemo.java new file mode 100644 index 0000000..fdf642d --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/StoreDemo.java @@ -0,0 +1,153 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.utils; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hive.hcatalog.mapreduce.HCatOutputFormat; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; +import org.apache.hive.hcatalog.mapreduce.OutputJobInfo; + +/** + * This is a map reduce test for testing hcat which goes against the "numbers" + * table and writes data to another table. It reads data from numbers which + * is an unpartitioned table and adds 10 to each field. It stores the result into + * the datestamp='20100101' partition of the numbers_part_empty_initially table if the second + * command line arg is "part". If the second cmdline arg is "nopart" then the + * result is stored into the 'numbers_nopart_empty_initially' (unpartitioned) table. + * If the second cmdline arg is "nopart_pig", then the result is stored into the + * 'numbers_nopart_pig_empty_initially' (unpartitioned) table with the tinyint + * and smallint columns in "numbers" being stored as "int" (since pig cannot handle + * tinyint and smallint) + * + * Usage: hadoop jar storenumbers <-libjars hive-hcat jar> + If the second argument is "part" data is written to datestamp = '2010101' partition of the numbers_part_empty_initially table. + If the second argument is "nopart", data is written to the unpartitioned numbers_nopart_empty_initially table. + If the second argument is "nopart_pig", data is written to the unpartitioned numbers_nopart_pig_empty_initially table. + The hcat jar location should be specified as file:// + */ +public class StoreDemo { + + private static final String NUMBERS_PARTITIONED_TABLE_NAME = "demo_partitioned"; + private static final String NUMBERS_TABLE_NAME = "demo"; + + public static class SumMapper + extends Mapper { + + + Integer intnum; + + Double doublenum; + + @Override + protected void map(WritableComparable key, HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + intnum = ((Integer) value.get(0)); + value.set(0, intnum + 20); + doublenum = ((Double) value.get(1)); + value.set(1, (Double) (doublenum + 20)); + context.write(new IntWritable(0), value); + + } + } + + + public static void main(String[] args) throws Exception { + Configuration conf = new Configuration(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + String[] otherArgs = new String[1]; + int j = 0; + for (int i = 0; i < args.length; i++) { + if (args[i].equals("-libjars")) { + // generic options parser doesn't seem to work! + conf.set("tmpjars", args[i + 1]); + i = i + 1; // skip it , the for loop will skip its value + } else { + otherArgs[j++] = args[i]; + } + } + if (otherArgs.length != 1) { + usage(); + } + String serverUri = otherArgs[0]; + + String tableName = NUMBERS_TABLE_NAME; + String dbName = "default"; + Map outputPartitionKvps = new HashMap(); + String outputTableName = NUMBERS_PARTITIONED_TABLE_NAME; + outputPartitionKvps.put("datestamp", "20100102"); + + String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "storedemo"); + // initialize HCatInputFormat + HCatInputFormat.setInput(job, InputJobInfo.create( + dbName, tableName, null)); + // initialize HCatOutputFormat + HCatOutputFormat.setOutput(job, OutputJobInfo.create( + dbName, outputTableName, outputPartitionKvps)); + // test with and without specifying schema randomly + HCatSchema s = HCatInputFormat.getTableSchema(job); + System.err.println("INFO: output schema explicitly set for writing:" + s); + HCatOutputFormat.setSchema(job, s); + + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(HCatOutputFormat.class); + job.setJarByClass(StoreDemo.class); + job.setMapperClass(SumMapper.class); + job.setOutputKeyClass(IntWritable.class); + job.setNumReduceTasks(0); + job.setOutputValueClass(DefaultHCatRecord.class); + System.exit(job.waitForCompletion(true) ? 0 : 1); + } + + + /** + * + */ + private static void usage() { + System.err.println("Usage: hadoop jar storenumbers <-libjars hive-hcat jar>\n" + + "\tIf the second argument is \"part\" data is written to datestamp = '2010101' partition of " + + "the numbers_part_empty_initially table.\n\tIf the second argument is \"nopart\", data is written to " + + "the unpartitioned numbers_nopart_empty_initially table.\n\tIf the second argument is \"nopart_pig\", " + + "data is written to the unpartitioned numbers_nopart_pig_empty_initially table.\nt" + + "The hcat jar location should be specified as file://\n"); + System.exit(2); + + } + + +} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/StoreNumbers.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/StoreNumbers.java new file mode 100644 index 0000000..3be6e70 --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/StoreNumbers.java @@ -0,0 +1,233 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.utils; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hive.hcatalog.mapreduce.HCatOutputFormat; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; +import org.apache.hive.hcatalog.mapreduce.OutputJobInfo; + +/** + * This is a map reduce test for testing hcat which goes against the "numbers" + * table and writes data to another table. It reads data from numbers which + * is an unpartitioned table and adds 10 to each field. It stores the result into + * the datestamp='20100101' partition of the numbers_part_empty_initially table if the second + * command line arg is "part". If the second cmdline arg is "nopart" then the + * result is stored into the 'numbers_nopart_empty_initially' (unpartitioned) table. + * If the second cmdline arg is "nopart_pig", then the result is stored into the + * 'numbers_nopart_pig_empty_initially' (unpartitioned) table with the tinyint + * and smallint columns in "numbers" being stored as "int" (since pig cannot handle + * tinyint and smallint) + * + * Usage: hadoop jar storenumbers <-libjars hive-hcat jar> + If the second argument is "part" data is written to datestamp = '2010101' partition of the numbers_part_empty_initially table. + If the second argument is "nopart", data is written to the unpartitioned numbers_nopart_empty_initially table. + If the second argument is "nopart_pig", data is written to the unpartitioned numbers_nopart_pig_empty_initially table. + The hcat jar location should be specified as file:// + */ +public class StoreNumbers { + + private static final String NUMBERS_PARTITIONED_TABLE_NAME = "numbers_part_empty_initially"; + private static final String NUMBERS_TABLE_NAME = "numbers"; + private static final String NUMBERS_NON_PARTITIONED_TABLE_NAME = "numbers_nopart_empty_initially"; + private static final String NUMBERS_NON_PARTITIONED_PIG_TABLE_NAME = "numbers_nopart_pig_empty_initially"; + private static final String IS_PIG_NON_PART_TABLE = "is.pig.non.part.table"; + + public static class SumMapper + extends Mapper { + + Integer intnum1000; + // though id is given as a Short by hcat, the map will emit it as an + // IntWritable so we can just sum in the reduce + Short id; + + // though intnum5 is handed as a Byte by hcat, the map() will emit it as + // an IntWritable so we can just sum in the reduce + Byte intnum5; + Integer intnum100; + Integer intnum; + Long longnum; + Float floatnum; + Double doublenum; + + @Override + protected void map(WritableComparable key, HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + boolean isnoPartPig = context.getConfiguration().getBoolean(IS_PIG_NON_PART_TABLE, false); + intnum1000 = ((Integer) value.get(0)); + id = ((Short) value.get(1)); + intnum5 = (((Byte) value.get(2))); + intnum100 = (((Integer) value.get(3))); + intnum = ((Integer) value.get(4)); + longnum = ((Long) value.get(5)); + floatnum = ((Float) value.get(6)); + doublenum = ((Double) value.get(7)); + HCatRecord output = new DefaultHCatRecord(8); + output.set(0, intnum1000 + 10); + if (isnoPartPig) { + output.set(1, ((int) (id + 10))); + } else { + output.set(1, ((short) (id + 10))); + } + if (isnoPartPig) { + output.set(2, (int) (intnum5 + 10)); + } else { + output.set(2, (byte) (intnum5 + 10)); + } + + output.set(3, intnum100 + 10); + output.set(4, intnum + 10); + output.set(5, (long) (longnum + 10)); + output.set(6, (float) (floatnum + 10)); + output.set(7, (double) (doublenum + 10)); + for (int i = 0; i < 8; i++) { + System.err.println("XXX: class:" + output.get(i).getClass()); + } + context.write(new IntWritable(0), output); + + } + } + + + public static void main(String[] args) throws Exception { + Configuration conf = new Configuration(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + String[] otherArgs = new String[2]; + int j = 0; + for (int i = 0; i < args.length; i++) { + if (args[i].equals("-libjars")) { + // generic options parser doesn't seem to work! + conf.set("tmpjars", args[i + 1]); + i = i + 1; // skip it , the for loop will skip its value + } else { + otherArgs[j++] = args[i]; + } + } + if (otherArgs.length != 2) { + usage(); + } + String serverUri = otherArgs[0]; + if (otherArgs[1] == null || ( + !otherArgs[1].equalsIgnoreCase("part") && !otherArgs[1].equalsIgnoreCase("nopart")) + && !otherArgs[1].equalsIgnoreCase("nopart_pig")) { + usage(); + } + boolean writeToPartitionedTable = (otherArgs[1].equalsIgnoreCase("part")); + boolean writeToNonPartPigTable = (otherArgs[1].equalsIgnoreCase("nopart_pig")); + String tableName = NUMBERS_TABLE_NAME; + String dbName = "default"; + Map outputPartitionKvps = new HashMap(); + String outputTableName = null; + conf.set(IS_PIG_NON_PART_TABLE, "false"); + if (writeToPartitionedTable) { + outputTableName = NUMBERS_PARTITIONED_TABLE_NAME; + outputPartitionKvps.put("datestamp", "20100101"); + } else { + if (writeToNonPartPigTable) { + conf.set(IS_PIG_NON_PART_TABLE, "true"); + outputTableName = NUMBERS_NON_PARTITIONED_PIG_TABLE_NAME; + } else { + outputTableName = NUMBERS_NON_PARTITIONED_TABLE_NAME; + } + // test with null or empty randomly + if (new Random().nextInt(2) == 0) { + outputPartitionKvps = null; + } + } + + String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "storenumbers"); + + // initialize HCatInputFormat + HCatInputFormat.setInput(job, InputJobInfo.create( + dbName, tableName, null)); + // initialize HCatOutputFormat + HCatOutputFormat.setOutput(job, OutputJobInfo.create( + dbName, outputTableName, outputPartitionKvps)); + // test with and without specifying schema randomly + HCatSchema s = HCatInputFormat.getTableSchema(job); + if (writeToNonPartPigTable) { + List newHfsList = new ArrayList(); + // change smallint and tinyint to int + for (HCatFieldSchema hfs : s.getFields()) { + if (hfs.getTypeString().equals("smallint")) { + newHfsList.add(new HCatFieldSchema(hfs.getName(), + HCatFieldSchema.Type.INT, hfs.getComment())); + } else if (hfs.getTypeString().equals("tinyint")) { + newHfsList.add(new HCatFieldSchema(hfs.getName(), + HCatFieldSchema.Type.INT, hfs.getComment())); + } else { + newHfsList.add(hfs); + } + } + s = new HCatSchema(newHfsList); + } + HCatOutputFormat.setSchema(job, s); + + + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(HCatOutputFormat.class); + job.setJarByClass(StoreNumbers.class); + job.setMapperClass(SumMapper.class); + job.setOutputKeyClass(IntWritable.class); + job.setNumReduceTasks(0); + job.setOutputValueClass(DefaultHCatRecord.class); + System.exit(job.waitForCompletion(true) ? 0 : 1); + } + + + /** + * + */ + private static void usage() { + System.err.println("Usage: hadoop jar storenumbers <-libjars hive-hcat jar>\n" + + "\tIf the second argument is \"part\" data is written to datestamp = '2010101' partition of " + + "the numbers_part_empty_initially table.\n\tIf the second argument is \"nopart\", data is written to " + + "the unpartitioned numbers_nopart_empty_initially table.\n\tIf the second argument is \"nopart_pig\", " + + "data is written to the unpartitioned numbers_nopart_pig_empty_initially table.\nt" + + "The hcat jar location should be specified as file://\n"); + System.exit(2); + + } + + +} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/SumNumbers.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/SumNumbers.java new file mode 100644 index 0000000..cfb138d --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/SumNumbers.java @@ -0,0 +1,258 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.utils; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; + +/** + * This is a map reduce test for testing hcat which goes against the "numbers" + * table. It performs a group by on the first column and a SUM operation on the + * other columns. This is to simulate a typical operation in a map reduce program + * to test that hcat hands the right data to the map reduce program + * + * Usage: hadoop jar sumnumbers <-libjars hive-hcat jar> + The argument controls the output delimiter + The hcat jar location should be specified as file:// + */ +public class SumNumbers { + + private static final String NUMBERS_TABLE_NAME = "numbers"; + private static final String TAB = "\t"; + + public static class SumMapper + extends Mapper { + + IntWritable intnum1000; + // though id is given as a Short by hcat, the map will emit it as an + // IntWritable so we can just sum in the reduce + IntWritable id; + + // though intnum5 is handed as a Byte by hcat, the map() will emit it as + // an IntWritable so we can just sum in the reduce + IntWritable intnum5; + IntWritable intnum100; + IntWritable intnum; + LongWritable longnum; + FloatWritable floatnum; + DoubleWritable doublenum; + + @Override + protected void map(WritableComparable key, HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + intnum1000 = new IntWritable((Integer) value.get(0)); + id = new IntWritable((Short) value.get(1)); + intnum5 = new IntWritable(((Byte) value.get(2))); + intnum100 = new IntWritable(((Integer) value.get(3))); + intnum = new IntWritable((Integer) value.get(4)); + longnum = new LongWritable((Long) value.get(5)); + floatnum = new FloatWritable((Float) value.get(6)); + doublenum = new DoubleWritable((Double) value.get(7)); + SumNumbers.ArrayWritable outputValue = new SumNumbers.ArrayWritable(id, + intnum5, intnum100, intnum, longnum, floatnum, doublenum); + context.write(intnum1000, outputValue); + + } + } + + public static class SumReducer extends Reducer { + + + LongWritable dummyLong = null; + + @Override + protected void reduce(IntWritable key, java.lang.Iterable + values, org.apache.hadoop.mapreduce.Reducer.Context context) + throws IOException, InterruptedException { + String output = key.toString() + TAB; + Long sumid = 0l; + Long sumintnum5 = 0l; + Long sumintnum100 = 0l; + Long sumintnum = 0l; + Long sumlongnum = 0l; + Float sumfloatnum = 0.0f; + Double sumdoublenum = 0.0; + for (ArrayWritable value : values) { + sumid += value.id.get(); + sumintnum5 += value.intnum5.get(); + sumintnum100 += value.intnum100.get(); + sumintnum += value.intnum.get(); + sumlongnum += value.longnum.get(); + sumfloatnum += value.floatnum.get(); + sumdoublenum += value.doublenum.get(); + } + output += sumid + TAB; + output += sumintnum5 + TAB; + output += sumintnum100 + TAB; + output += sumintnum + TAB; + output += sumlongnum + TAB; + output += sumfloatnum + TAB; + output += sumdoublenum + TAB; + context.write(dummyLong, new Text(output)); + } + } + + public static void main(String[] args) throws Exception { + Configuration conf = new Configuration(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + String[] otherArgs = new String[4]; + int j = 0; + for (int i = 0; i < args.length; i++) { + if (args[i].equals("-libjars")) { + // generic options parser doesn't seem to work! + conf.set("tmpjars", args[i + 1]); + i = i + 1; // skip it , the for loop will skip its value + } else { + otherArgs[j++] = args[i]; + } + } + if (otherArgs.length != 4) { + System.err.println("Usage: hadoop jar sumnumbers <-libjars hive-hcat jar>\n" + + "The argument controls the output delimiter.\n" + + "The hcat jar location should be specified as file://\n"); + System.exit(2); + } + String serverUri = otherArgs[0]; + String tableName = NUMBERS_TABLE_NAME; + String outputDir = otherArgs[1]; + String dbName = "default"; + + String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "sumnumbers"); + HCatInputFormat.setInput(job, InputJobInfo.create( + dbName, tableName, null)); + // initialize HCatOutputFormat + + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + job.setJarByClass(SumNumbers.class); + job.setMapperClass(SumMapper.class); + job.setReducerClass(SumReducer.class); + job.setMapOutputKeyClass(IntWritable.class); + job.setMapOutputValueClass(ArrayWritable.class); + job.setOutputKeyClass(LongWritable.class); + job.setOutputValueClass(Text.class); + FileOutputFormat.setOutputPath(job, new Path(outputDir)); + System.exit(job.waitForCompletion(true) ? 0 : 1); + } + + public static class ArrayWritable implements Writable { + + // though id is given as a Short by hcat, the map will emit it as an + // IntWritable so we can just sum in the reduce + IntWritable id; + + // though intnum5 is handed as a Byte by hcat, the map() will emit it as + // an IntWritable so we can just sum in the reduce + IntWritable intnum5; + + IntWritable intnum100; + IntWritable intnum; + LongWritable longnum; + FloatWritable floatnum; + DoubleWritable doublenum; + + /** + * + */ + public ArrayWritable() { + id = new IntWritable(); + intnum5 = new IntWritable(); + intnum100 = new IntWritable(); + intnum = new IntWritable(); + longnum = new LongWritable(); + floatnum = new FloatWritable(); + doublenum = new DoubleWritable(); + } + + + /** + * @param id + * @param intnum5 + * @param intnum100 + * @param intnum + * @param longnum + * @param floatnum + * @param doublenum + */ + public ArrayWritable(IntWritable id, IntWritable intnum5, + IntWritable intnum100, IntWritable intnum, LongWritable longnum, + FloatWritable floatnum, DoubleWritable doublenum) { + this.id = id; + this.intnum5 = intnum5; + this.intnum100 = intnum100; + this.intnum = intnum; + this.longnum = longnum; + this.floatnum = floatnum; + this.doublenum = doublenum; + } + + + @Override + public void readFields(DataInput in) throws IOException { + id.readFields(in); + intnum5.readFields(in); + intnum100.readFields(in); + intnum.readFields(in); + longnum.readFields(in); + floatnum.readFields(in); + doublenum.readFields(in); + } + + @Override + public void write(DataOutput out) throws IOException { + id.write(out); + intnum5.write(out); + intnum100.write(out); + intnum.write(out); + longnum.write(out); + floatnum.write(out); + doublenum.write(out); + + } + + } +} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/TypeDataCheck.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/TypeDataCheck.java new file mode 100644 index 0000000..27644f7 --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/TypeDataCheck.java @@ -0,0 +1,183 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.utils; + +import java.io.IOException; +import java.util.Arrays; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; + +/** + * This is a map reduce test for testing hcat that checks that the columns + * handed by hcat have the right type and right values. It achieves the first + * objective by checking the type of the Objects representing the columns against + * the schema provided as a cmdline arg. It achieves the second objective by + * writing the data as Text to be compared against golden results. + * + * The schema specification consists of the types as given by "describe
" + * with each column's type separated from the next column's type by a '+' + * + * Can be used against "numbers" and "complex" tables. + * + * Usage: hadoop jar testudf.jar typedatacheck + * <-libjars hive-hcat jar> + The argument controls the output delimiter. + The hcat jar location should be specified as file:// + */ +public class TypeDataCheck implements Tool { + + static String SCHEMA_KEY = "schema"; + static String DELIM = "delim"; + private static Configuration conf = new Configuration(); + + public static class TypeDataCheckMapper + extends Mapper { + + Long dummykey = null; + String[] types; + String delim = "\u0001"; + + @Override + protected void setup(org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + String typesStr = context.getConfiguration().get(SCHEMA_KEY); + delim = context.getConfiguration().get(DELIM); + if (delim.equals("tab")) { + delim = "\t"; + } else if (delim.equals("ctrla")) { + delim = "\u0001"; + } + types = typesStr.split("\\+"); + for (int i = 0; i < types.length; i++) { + types[i] = types[i].toLowerCase(); + } + + + } + + String check(HCatRecord r) throws IOException { + String s = ""; + for (int i = 0; i < r.size(); i++) { + s += Util.check(types[i], r.get(i)); + if (i != r.size() - 1) { + s += delim; + } + } + return s; + } + + @Override + protected void map(WritableComparable key, HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + context.write(dummykey, new Text(check(value))); + } + } + + public static void main(String[] args) throws Exception { + TypeDataCheck self = new TypeDataCheck(); + System.exit(ToolRunner.run(conf, self, args)); + } + + public int run(String[] args) { + try { + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + String[] otherArgs = new String[5]; + int j = 0; + for (int i = 0; i < args.length; i++) { + if (args[i].equals("-libjars")) { + conf.set("tmpjars", args[i + 1]); + i = i + 1; // skip it , the for loop will skip its value + } else { + otherArgs[j++] = args[i]; + } + } + if (otherArgs.length != 5) { + System.err.println("Other args:" + Arrays.asList(otherArgs)); + System.err.println("Usage: hadoop jar testudf.jar typedatacheck " + + " " + + " <-libjars hive-hcat jar>\n" + + "The argument controls the output delimiter.\n" + + "The hcat jar location should be specified as file://\n"); + System.err.println(" The argument controls the output delimiter."); + System.exit(2); + } + String serverUri = otherArgs[0]; + String tableName = otherArgs[1]; + String schemaStr = otherArgs[2]; + String outputDir = otherArgs[3]; + String outputdelim = otherArgs[4]; + if (!outputdelim.equals("tab") && !outputdelim.equals("ctrla")) { + System.err.println("ERROR: Specify 'tab' or 'ctrla' for output delimiter"); + } + String dbName = "default"; + + String principalID = System.getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) { + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + } + Job job = new Job(conf, "typedatacheck"); + // initialize HCatInputFormat + HCatInputFormat.setInput(job, InputJobInfo.create( + dbName, tableName, null)); + HCatSchema s = HCatInputFormat.getTableSchema(job); + job.getConfiguration().set(SCHEMA_KEY, schemaStr); + job.getConfiguration().set(DELIM, outputdelim); + job.setInputFormatClass(HCatInputFormat.class); + job.setOutputFormatClass(TextOutputFormat.class); + job.setJarByClass(TypeDataCheck.class); + job.setMapperClass(TypeDataCheckMapper.class); + job.setNumReduceTasks(0); + job.setOutputKeyClass(Long.class); + job.setOutputValueClass(Text.class); + FileOutputFormat.setOutputPath(job, new Path(outputDir)); + System.exit(job.waitForCompletion(true) ? 0 : 1); + return 0; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public Configuration getConf() { + return conf; + } + + @Override + public void setConf(Configuration conf) { + TypeDataCheck.conf = conf; + } + +} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/Util.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/Util.java new file mode 100644 index 0000000..97104b8 --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/Util.java @@ -0,0 +1,106 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.utils; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +public class Util { + + static Map> typeMap = new HashMap>(); + + static { + typeMap.put("tinyint", Byte.class); + typeMap.put("smallint", Short.class); + typeMap.put("int", Integer.class); + typeMap.put("bigint", Long.class); + typeMap.put("float", Float.class); + typeMap.put("double", Double.class); + typeMap.put("string", String.class); + typeMap.put("boolean", Boolean.class); + typeMap.put("struct", List.class); + typeMap.put("map", Map.class); + typeMap.put("array>", List.class); + } + + public static void die(String expectedType, Object o) throws IOException { + throw new IOException("Expected " + expectedType + ", got " + + o.getClass().getName()); + } + + + public static String check(String type, Object o) throws IOException { + if (o == null) { + return "null"; + } + if (check(typeMap.get(type), o)) { + if (type.equals("map")) { + Map m = (Map) o; + check(m); + } else if (type.equals("array>")) { + List> listOfMaps = (List>) o; + for (Map m : listOfMaps) { + check(m); + } + } else if (type.equals("struct")) { + List l = (List) o; + if (!check(Integer.class, l.get(0)) || + !check(String.class, l.get(1)) || + !check(Double.class, l.get(2))) { + die("struct", l); + } + } + } else { + die(typeMap.get(type).getName(), o); + } + return o.toString(); + } + + /** + * @param m + * @throws IOException + */ + public static void check(Map m) throws IOException { + if (m == null) { + return; + } + for (Entry e : m.entrySet()) { + // just access key and value to ensure they are correct + if (!check(String.class, e.getKey())) { + die("String", e.getKey()); + } + if (!check(String.class, e.getValue())) { + die("String", e.getValue()); + } + } + + } + + public static boolean check(Class expected, Object actual) { + if (actual == null) { + return true; + } + return expected.isAssignableFrom(actual.getClass()); + } + +} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteJson.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteJson.java new file mode 100644 index 0000000..b60a511 --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteJson.java @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.utils; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hive.hcatalog.mapreduce.HCatOutputFormat; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; +import org.apache.hive.hcatalog.mapreduce.OutputJobInfo; + +/** + * This is a map reduce test for testing hcat which goes against the "numbers" + * table. It performs a group by on the first column and a SUM operation on the + * other columns. This is to simulate a typical operation in a map reduce + * program to test that hcat hands the right data to the map reduce program + * + * Usage: hadoop jar sumnumbers <-libjars hive-hcat + * jar> The argument controls the output delimiter The hcat jar + * location should be specified as file:// + */ +public class WriteJson extends Configured implements Tool { + + public static class Map extends + Mapper { + + String s; + Integer i; + Double d; + + @Override + protected void map( + WritableComparable key, + HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + s = value.get(0) == null ? null : (String) value.get(0); + i = value.get(1) == null ? null : (Integer) value.get(1); + d = value.get(2) == null ? null : (Double) value.get(2); + + HCatRecord record = new DefaultHCatRecord(5); + record.set(0, s); + record.set(1, i); + record.set(2, d); + + context.write(null, record); + + } + } + + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + + String serverUri = args[0]; + String inputTableName = args[1]; + String outputTableName = args[2]; + String dbName = null; + + String principalID = System + .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "WriteJson"); + HCatInputFormat.setInput(job, InputJobInfo.create(dbName, + inputTableName, null)); + // initialize HCatOutputFormat + + job.setInputFormatClass(HCatInputFormat.class); + job.setJarByClass(WriteJson.class); + job.setMapperClass(Map.class); + job.setOutputKeyClass(WritableComparable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + job.setNumReduceTasks(0); + HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, + outputTableName, null)); + HCatSchema s = HCatInputFormat.getTableSchema(job); + System.err.println("INFO: output schema explicitly set for writing:" + + s); + HCatOutputFormat.setSchema(job, s); + job.setOutputFormatClass(HCatOutputFormat.class); + return (job.waitForCompletion(true) ? 0 : 1); + } + + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new WriteJson(), args); + System.exit(exitCode); + } +} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteRC.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteRC.java new file mode 100644 index 0000000..1c72872 --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteRC.java @@ -0,0 +1,120 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.utils; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hive.hcatalog.mapreduce.HCatOutputFormat; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; +import org.apache.hive.hcatalog.mapreduce.OutputJobInfo; + +/** + * This is a map reduce test for testing hcat which goes against the "numbers" + * table. It performs a group by on the first column and a SUM operation on the + * other columns. This is to simulate a typical operation in a map reduce + * program to test that hcat hands the right data to the map reduce program + * + * Usage: hadoop jar sumnumbers <-libjars hive-hcat + * jar> The argument controls the output delimiter The hcat jar + * location should be specified as file:// + */ +public class WriteRC extends Configured implements Tool { + + public static class Map extends + Mapper { + + String name; + Integer age; + Double gpa; + + @Override + protected void map( + WritableComparable key, + HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + name = value.get(0) == null ? null : (String) value.get(0); + age = value.get(1) == null ? null : (Integer) value.get(1); + gpa = value.get(2) == null ? null : (Double) value.get(2); + + if (gpa != null) gpa = Math.floor(gpa) + 0.1; + + HCatRecord record = new DefaultHCatRecord(5); + record.set(0, name); + record.set(1, age); + record.set(2, gpa); + + context.write(null, record); + + } + } + + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + + String serverUri = args[0]; + String inputTableName = args[1]; + String outputTableName = args[2]; + String dbName = null; + + String principalID = System + .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "WriteRC"); + HCatInputFormat.setInput(job, InputJobInfo.create(dbName, + inputTableName, null)); + // initialize HCatOutputFormat + + job.setInputFormatClass(HCatInputFormat.class); + job.setJarByClass(WriteRC.class); + job.setMapperClass(Map.class); + job.setOutputKeyClass(WritableComparable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + job.setNumReduceTasks(0); + HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, + outputTableName, null)); + HCatSchema s = HCatInputFormat.getTableSchema(job); + System.err.println("INFO: output schema explicitly set for writing:" + + s); + HCatOutputFormat.setSchema(job, s); + job.setOutputFormatClass(HCatOutputFormat.class); + return (job.waitForCompletion(true) ? 0 : 1); + } + + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new WriteRC(), args); + System.exit(exitCode); + } +} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteText.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteText.java new file mode 100644 index 0000000..43767a2 --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteText.java @@ -0,0 +1,130 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.utils; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hive.hcatalog.mapreduce.HCatOutputFormat; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; +import org.apache.hive.hcatalog.mapreduce.OutputJobInfo; + +/** + * This is a map reduce test for testing hcat which goes against the "numbers" + * table. It performs a group by on the first column and a SUM operation on the + * other columns. This is to simulate a typical operation in a map reduce + * program to test that hcat hands the right data to the map reduce program + * + * Usage: hadoop jar sumnumbers <-libjars hive-hcat + * jar> The argument controls the output delimiter The hcat jar + * location should be specified as file:// + */ +public class WriteText extends Configured implements Tool { + + public static class Map extends + Mapper { + + byte t; + short si; + int i; + long b; + float f; + double d; + String s; + + @Override + protected void map( + WritableComparable key, + HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + t = (Byte) value.get(0); + si = (Short) value.get(1); + i = (Integer) value.get(2); + b = (Long) value.get(3); + f = (Float) value.get(4); + d = (Double) value.get(5); + s = (String) value.get(6); + + HCatRecord record = new DefaultHCatRecord(7); + record.set(0, t); + record.set(1, si); + record.set(2, i); + record.set(3, b); + record.set(4, f); + record.set(5, d); + record.set(6, s); + + context.write(null, record); + + } + } + + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + + String serverUri = args[0]; + String inputTableName = args[1]; + String outputTableName = args[2]; + String dbName = null; + + String principalID = System + .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "WriteText"); + HCatInputFormat.setInput(job, InputJobInfo.create(dbName, + inputTableName, null)); + // initialize HCatOutputFormat + + job.setInputFormatClass(HCatInputFormat.class); + job.setJarByClass(WriteText.class); + job.setMapperClass(Map.class); + job.setOutputKeyClass(WritableComparable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + job.setNumReduceTasks(0); + HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, + outputTableName, null)); + HCatSchema s = HCatInputFormat.getTableSchema(job); + System.err.println("INFO: output schema explicitly set for writing:" + + s); + HCatOutputFormat.setSchema(job, s); + job.setOutputFormatClass(HCatOutputFormat.class); + return (job.waitForCompletion(true) ? 0 : 1); + } + + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new WriteText(), args); + System.exit(exitCode); + } +} diff --git hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteTextPartitioned.java hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteTextPartitioned.java new file mode 100644 index 0000000..b7e1549 --- /dev/null +++ hcatalog/src/test/e2e/hcatalog/udfs/java/org/apache/hive/hcatalog/utils/WriteTextPartitioned.java @@ -0,0 +1,133 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.utils; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hive.hcatalog.mapreduce.HCatOutputFormat; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; +import org.apache.hive.hcatalog.mapreduce.OutputJobInfo; + +/** + * This is a map reduce test for testing hcat writing to partitioned tables. + * table. It performs a group by on the first column and a SUM operation on the + * other columns. This is to simulate a typical operation in a map reduce + * program to test that hcat hands the right data to the map reduce program + * + * Usage: hadoop jar org.apache.hive.hcatalog.utils.HBaseReadWrite -libjars + * <hcat_jar> * <serveruri> <input_tablename> <output_tablename> [filter] + * If filter is given it will be provided as the partition to write to. + */ +public class WriteTextPartitioned extends Configured implements Tool { + + static String filter = null; + + public static class Map extends + Mapper { + + @Override + protected void map( + WritableComparable key, + HCatRecord value, + org.apache.hadoop.mapreduce.Mapper.Context context) + throws IOException, InterruptedException { + String name = (String) value.get(0); + int age = (Integer) value.get(1); + String ds = (String) value.get(3); + + HCatRecord record = (filter == null ? new DefaultHCatRecord(3) : new DefaultHCatRecord(2)); + record.set(0, name); + record.set(1, age); + if (filter == null) record.set(2, ds); + + context.write(null, record); + + } + } + + public int run(String[] args) throws Exception { + Configuration conf = getConf(); + args = new GenericOptionsParser(conf, args).getRemainingArgs(); + + String serverUri = args[0]; + String inputTableName = args[1]; + String outputTableName = args[2]; + if (args.length > 3) filter = args[3]; + String dbName = null; + + String principalID = System + .getProperty(HCatConstants.HCAT_METASTORE_PRINCIPAL); + if (principalID != null) + conf.set(HCatConstants.HCAT_METASTORE_PRINCIPAL, principalID); + Job job = new Job(conf, "WriteTextPartitioned"); + HCatInputFormat.setInput(job, InputJobInfo.create(dbName, + inputTableName, filter)); + // initialize HCatOutputFormat + + job.setInputFormatClass(HCatInputFormat.class); + job.setJarByClass(WriteTextPartitioned.class); + job.setMapperClass(Map.class); + job.setOutputKeyClass(WritableComparable.class); + job.setOutputValueClass(DefaultHCatRecord.class); + job.setNumReduceTasks(0); + + java.util.Map partitionVals = null; + if (filter != null) { + String[] s = filter.split("="); + String val = s[1].replace('"', ' ').trim(); + partitionVals = new HashMap(1); + partitionVals.put(s[0], val); + } + HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, + outputTableName, partitionVals)); + HCatSchema s = HCatInputFormat.getTableSchema(job); + // Build the schema for this table, which is slightly different than the + // schema for the input table + List fss = new ArrayList(3); + fss.add(s.get(0)); + fss.add(s.get(1)); + fss.add(s.get(3)); + HCatOutputFormat.setSchema(job, new HCatSchema(fss)); + job.setOutputFormatClass(HCatOutputFormat.class); + return (job.waitForCompletion(true) ? 0 : 1); + } + + public static void main(String[] args) throws Exception { + int exitCode = ToolRunner.run(new WriteTextPartitioned(), args); + System.exit(exitCode); + } +} diff --git hcatalog/storage-handlers/hbase/if/transaction.thrift hcatalog/storage-handlers/hbase/if/transaction.thrift index a74ac3d..a9e697e 100644 --- hcatalog/storage-handlers/hbase/if/transaction.thrift +++ hcatalog/storage-handlers/hbase/if/transaction.thrift @@ -17,7 +17,7 @@ * under the License. */ -namespace java org.apache.hcatalog.hbase.snapshot.transaction.thrift +namespace java org.apache.hive.hcatalog.hbase.snapshot.transaction.thrift namespace cpp Apache.HCatalog.HBase struct StoreFamilyRevision { diff --git hcatalog/storage-handlers/hbase/pom.xml hcatalog/storage-handlers/hbase/pom.xml index 7806c8d..2fc160f 100644 --- hcatalog/storage-handlers/hbase/pom.xml +++ hcatalog/storage-handlers/hbase/pom.xml @@ -22,14 +22,13 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> - org.apache.hcatalog + org.apache.hive.hcatalog hcatalog 0.12.0-SNAPSHOT ../../pom.xml 4.0.0 - org.apache.hcatalog hbase-storage-handler jar hbase-storage-handler @@ -49,7 +48,7 @@ - org.apache.hcatalog + org.apache.hive.hcatalog hcatalog-core ${hcatalog.version} compile diff --git hcatalog/storage-handlers/hbase/src/gen-java/org/apache/hcatalog/hbase/snapshot/transaction/thrift/StoreFamilyRevision.java hcatalog/storage-handlers/hbase/src/gen-java/org/apache/hcatalog/hbase/snapshot/transaction/thrift/StoreFamilyRevision.java deleted file mode 100644 index a5d8213..0000000 --- hcatalog/storage-handlers/hbase/src/gen-java/org/apache/hcatalog/hbase/snapshot/transaction/thrift/StoreFamilyRevision.java +++ /dev/null @@ -1,416 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - * This class is used to store the revision and timestamp of a column family - * in a transaction. - * - */ -/** - * Autogenerated by Thrift Compiler (0.7.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - */ -package org.apache.hcatalog.hbase.snapshot.transaction.thrift; - -import java.util.Map; -import java.util.HashMap; -import java.util.EnumMap; -import java.util.EnumSet; -import java.util.Collections; -import java.util.BitSet; - -public class StoreFamilyRevision implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("StoreFamilyRevision"); - - private static final org.apache.thrift.protocol.TField REVISION_FIELD_DESC = new org.apache.thrift.protocol.TField("revision", org.apache.thrift.protocol.TType.I64, (short) 1); - private static final org.apache.thrift.protocol.TField TIMESTAMP_FIELD_DESC = new org.apache.thrift.protocol.TField("timestamp", org.apache.thrift.protocol.TType.I64, (short) 2); - - public long revision; // required - public long timestamp; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REVISION((short) 1, "revision"), - TIMESTAMP((short) 2, "timestamp"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch (fieldId) { - case 1: // REVISION - return REVISION; - case 2: // TIMESTAMP - return TIMESTAMP; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - private static final int __REVISION_ISSET_ID = 0; - private static final int __TIMESTAMP_ISSET_ID = 1; - private BitSet __isset_bit_vector = new BitSet(2); - - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REVISION, new org.apache.thrift.meta_data.FieldMetaData("revision", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); - tmpMap.put(_Fields.TIMESTAMP, new org.apache.thrift.meta_data.FieldMetaData("timestamp", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(StoreFamilyRevision.class, metaDataMap); - } - - public StoreFamilyRevision() { - } - - public StoreFamilyRevision( - long revision, - long timestamp) { - this(); - this.revision = revision; - setRevisionIsSet(true); - this.timestamp = timestamp; - setTimestampIsSet(true); - } - - /** - * Performs a deep copy on other. - */ - public StoreFamilyRevision(StoreFamilyRevision other) { - __isset_bit_vector.clear(); - __isset_bit_vector.or(other.__isset_bit_vector); - this.revision = other.revision; - this.timestamp = other.timestamp; - } - - public StoreFamilyRevision deepCopy() { - return new StoreFamilyRevision(this); - } - - @Override - public void clear() { - setRevisionIsSet(false); - this.revision = 0; - setTimestampIsSet(false); - this.timestamp = 0; - } - - public long getRevision() { - return this.revision; - } - - public StoreFamilyRevision setRevision(long revision) { - this.revision = revision; - setRevisionIsSet(true); - return this; - } - - public void unsetRevision() { - __isset_bit_vector.clear(__REVISION_ISSET_ID); - } - - /** Returns true if field revision is set (has been assigned a value) and false otherwise */ - public boolean isSetRevision() { - return __isset_bit_vector.get(__REVISION_ISSET_ID); - } - - public void setRevisionIsSet(boolean value) { - __isset_bit_vector.set(__REVISION_ISSET_ID, value); - } - - public long getTimestamp() { - return this.timestamp; - } - - public StoreFamilyRevision setTimestamp(long timestamp) { - this.timestamp = timestamp; - setTimestampIsSet(true); - return this; - } - - public void unsetTimestamp() { - __isset_bit_vector.clear(__TIMESTAMP_ISSET_ID); - } - - /** Returns true if field timestamp is set (has been assigned a value) and false otherwise */ - public boolean isSetTimestamp() { - return __isset_bit_vector.get(__TIMESTAMP_ISSET_ID); - } - - public void setTimestampIsSet(boolean value) { - __isset_bit_vector.set(__TIMESTAMP_ISSET_ID, value); - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REVISION: - if (value == null) { - unsetRevision(); - } else { - setRevision((Long) value); - } - break; - - case TIMESTAMP: - if (value == null) { - unsetTimestamp(); - } else { - setTimestamp((Long) value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REVISION: - return Long.valueOf(getRevision()); - - case TIMESTAMP: - return Long.valueOf(getTimestamp()); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REVISION: - return isSetRevision(); - case TIMESTAMP: - return isSetTimestamp(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof StoreFamilyRevision) - return this.equals((StoreFamilyRevision) that); - return false; - } - - public boolean equals(StoreFamilyRevision that) { - if (that == null) - return false; - - boolean this_present_revision = true; - boolean that_present_revision = true; - if (this_present_revision || that_present_revision) { - if (!(this_present_revision && that_present_revision)) - return false; - if (this.revision != that.revision) - return false; - } - - boolean this_present_timestamp = true; - boolean that_present_timestamp = true; - if (this_present_timestamp || that_present_timestamp) { - if (!(this_present_timestamp && that_present_timestamp)) - return false; - if (this.timestamp != that.timestamp) - return false; - } - - return true; - } - - @Override - public int hashCode() { - return 0; - } - - public int compareTo(StoreFamilyRevision other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - StoreFamilyRevision typedOther = (StoreFamilyRevision) other; - - lastComparison = Boolean.valueOf(isSetRevision()).compareTo(typedOther.isSetRevision()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetRevision()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.revision, typedOther.revision); - if (lastComparison != 0) { - return lastComparison; - } - } - lastComparison = Boolean.valueOf(isSetTimestamp()).compareTo(typedOther.isSetTimestamp()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetTimestamp()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.timestamp, typedOther.timestamp); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField field; - iprot.readStructBegin(); - while (true) { - field = iprot.readFieldBegin(); - if (field.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (field.id) { - case 1: // REVISION - if (field.type == org.apache.thrift.protocol.TType.I64) { - this.revision = iprot.readI64(); - setRevisionIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - } - break; - case 2: // TIMESTAMP - if (field.type == org.apache.thrift.protocol.TType.I64) { - this.timestamp = iprot.readI64(); - setTimestampIsSet(true); - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - - // check for required fields of primitive type, which can't be checked in the validate method - validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - validate(); - - oprot.writeStructBegin(STRUCT_DESC); - oprot.writeFieldBegin(REVISION_FIELD_DESC); - oprot.writeI64(this.revision); - oprot.writeFieldEnd(); - oprot.writeFieldBegin(TIMESTAMP_FIELD_DESC); - oprot.writeI64(this.timestamp); - oprot.writeFieldEnd(); - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("StoreFamilyRevision("); - boolean first = true; - - sb.append("revision:"); - sb.append(this.revision); - first = false; - if (!first) sb.append(", "); - sb.append("timestamp:"); - sb.append(this.timestamp); - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. - __isset_bit_vector = new BitSet(1); - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - -} - diff --git hcatalog/storage-handlers/hbase/src/gen-java/org/apache/hcatalog/hbase/snapshot/transaction/thrift/StoreFamilyRevisionList.java hcatalog/storage-handlers/hbase/src/gen-java/org/apache/hcatalog/hbase/snapshot/transaction/thrift/StoreFamilyRevisionList.java deleted file mode 100644 index 0f661cb..0000000 --- hcatalog/storage-handlers/hbase/src/gen-java/org/apache/hcatalog/hbase/snapshot/transaction/thrift/StoreFamilyRevisionList.java +++ /dev/null @@ -1,369 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - * This class is used to store a list of StoreFamilyRevision for a column - * family in zookeeper. - * - */ -/** - * Autogenerated by Thrift Compiler (0.7.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - */ -package org.apache.hcatalog.hbase.snapshot.transaction.thrift; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.EnumMap; -import java.util.EnumSet; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -public class StoreFamilyRevisionList implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { - private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("StoreFamilyRevisionList"); - - private static final org.apache.thrift.protocol.TField REVISION_LIST_FIELD_DESC = new org.apache.thrift.protocol.TField("revisionList", org.apache.thrift.protocol.TType.LIST, (short) 1); - - public List revisionList; // required - - /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ - public enum _Fields implements org.apache.thrift.TFieldIdEnum { - REVISION_LIST((short) 1, "revisionList"); - - private static final Map byName = new HashMap(); - - static { - for (_Fields field : EnumSet.allOf(_Fields.class)) { - byName.put(field.getFieldName(), field); - } - } - - /** - * Find the _Fields constant that matches fieldId, or null if its not found. - */ - public static _Fields findByThriftId(int fieldId) { - switch (fieldId) { - case 1: // REVISION_LIST - return REVISION_LIST; - default: - return null; - } - } - - /** - * Find the _Fields constant that matches fieldId, throwing an exception - * if it is not found. - */ - public static _Fields findByThriftIdOrThrow(int fieldId) { - _Fields fields = findByThriftId(fieldId); - if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); - return fields; - } - - /** - * Find the _Fields constant that matches name, or null if its not found. - */ - public static _Fields findByName(String name) { - return byName.get(name); - } - - private final short _thriftId; - private final String _fieldName; - - _Fields(short thriftId, String fieldName) { - _thriftId = thriftId; - _fieldName = fieldName; - } - - public short getThriftFieldId() { - return _thriftId; - } - - public String getFieldName() { - return _fieldName; - } - } - - // isset id assignments - - public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; - - static { - Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); - tmpMap.put(_Fields.REVISION_LIST, new org.apache.thrift.meta_data.FieldMetaData("revisionList", org.apache.thrift.TFieldRequirementType.DEFAULT, - new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, - new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, StoreFamilyRevision.class)))); - metaDataMap = Collections.unmodifiableMap(tmpMap); - org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(StoreFamilyRevisionList.class, metaDataMap); - } - - public StoreFamilyRevisionList() { - } - - public StoreFamilyRevisionList( - List revisionList) { - this(); - this.revisionList = revisionList; - } - - /** - * Performs a deep copy on other. - */ - public StoreFamilyRevisionList(StoreFamilyRevisionList other) { - if (other.isSetRevisionList()) { - List __this__revisionList = new ArrayList(); - for (StoreFamilyRevision other_element : other.revisionList) { - __this__revisionList.add(new StoreFamilyRevision(other_element)); - } - this.revisionList = __this__revisionList; - } - } - - public StoreFamilyRevisionList deepCopy() { - return new StoreFamilyRevisionList(this); - } - - @Override - public void clear() { - this.revisionList = null; - } - - public int getRevisionListSize() { - return (this.revisionList == null) ? 0 : this.revisionList.size(); - } - - public java.util.Iterator getRevisionListIterator() { - return (this.revisionList == null) ? null : this.revisionList.iterator(); - } - - public void addToRevisionList(StoreFamilyRevision elem) { - if (this.revisionList == null) { - this.revisionList = new ArrayList(); - } - this.revisionList.add(elem); - } - - public List getRevisionList() { - return this.revisionList; - } - - public StoreFamilyRevisionList setRevisionList(List revisionList) { - this.revisionList = revisionList; - return this; - } - - public void unsetRevisionList() { - this.revisionList = null; - } - - /** Returns true if field revisionList is set (has been assigned a value) and false otherwise */ - public boolean isSetRevisionList() { - return this.revisionList != null; - } - - public void setRevisionListIsSet(boolean value) { - if (!value) { - this.revisionList = null; - } - } - - public void setFieldValue(_Fields field, Object value) { - switch (field) { - case REVISION_LIST: - if (value == null) { - unsetRevisionList(); - } else { - setRevisionList((List) value); - } - break; - - } - } - - public Object getFieldValue(_Fields field) { - switch (field) { - case REVISION_LIST: - return getRevisionList(); - - } - throw new IllegalStateException(); - } - - /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ - public boolean isSet(_Fields field) { - if (field == null) { - throw new IllegalArgumentException(); - } - - switch (field) { - case REVISION_LIST: - return isSetRevisionList(); - } - throw new IllegalStateException(); - } - - @Override - public boolean equals(Object that) { - if (that == null) - return false; - if (that instanceof StoreFamilyRevisionList) - return this.equals((StoreFamilyRevisionList) that); - return false; - } - - public boolean equals(StoreFamilyRevisionList that) { - if (that == null) - return false; - - boolean this_present_revisionList = true && this.isSetRevisionList(); - boolean that_present_revisionList = true && that.isSetRevisionList(); - if (this_present_revisionList || that_present_revisionList) { - if (!(this_present_revisionList && that_present_revisionList)) - return false; - if (!this.revisionList.equals(that.revisionList)) - return false; - } - - return true; - } - - @Override - public int hashCode() { - return 0; - } - - public int compareTo(StoreFamilyRevisionList other) { - if (!getClass().equals(other.getClass())) { - return getClass().getName().compareTo(other.getClass().getName()); - } - - int lastComparison = 0; - StoreFamilyRevisionList typedOther = (StoreFamilyRevisionList) other; - - lastComparison = Boolean.valueOf(isSetRevisionList()).compareTo(typedOther.isSetRevisionList()); - if (lastComparison != 0) { - return lastComparison; - } - if (isSetRevisionList()) { - lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.revisionList, typedOther.revisionList); - if (lastComparison != 0) { - return lastComparison; - } - } - return 0; - } - - public _Fields fieldForId(int fieldId) { - return _Fields.findByThriftId(fieldId); - } - - public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { - org.apache.thrift.protocol.TField field; - iprot.readStructBegin(); - while (true) { - field = iprot.readFieldBegin(); - if (field.type == org.apache.thrift.protocol.TType.STOP) { - break; - } - switch (field.id) { - case 1: // REVISION_LIST - if (field.type == org.apache.thrift.protocol.TType.LIST) { - { - org.apache.thrift.protocol.TList _list0 = iprot.readListBegin(); - this.revisionList = new ArrayList(_list0.size); - for (int _i1 = 0; _i1 < _list0.size; ++_i1) { - StoreFamilyRevision _elem2; // required - _elem2 = new StoreFamilyRevision(); - _elem2.read(iprot); - this.revisionList.add(_elem2); - } - iprot.readListEnd(); - } - } else { - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - } - break; - default: - org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); - } - iprot.readFieldEnd(); - } - iprot.readStructEnd(); - - // check for required fields of primitive type, which can't be checked in the validate method - validate(); - } - - public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { - validate(); - - oprot.writeStructBegin(STRUCT_DESC); - if (this.revisionList != null) { - oprot.writeFieldBegin(REVISION_LIST_FIELD_DESC); - { - oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, this.revisionList.size())); - for (StoreFamilyRevision _iter3 : this.revisionList) { - _iter3.write(oprot); - } - oprot.writeListEnd(); - } - oprot.writeFieldEnd(); - } - oprot.writeFieldStop(); - oprot.writeStructEnd(); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder("StoreFamilyRevisionList("); - boolean first = true; - - sb.append("revisionList:"); - if (this.revisionList == null) { - sb.append("null"); - } else { - sb.append(this.revisionList); - } - first = false; - sb.append(")"); - return sb.toString(); - } - - public void validate() throws org.apache.thrift.TException { - // check for required fields - } - - private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { - try { - write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - - private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { - try { - read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); - } catch (org.apache.thrift.TException te) { - throw new java.io.IOException(te); - } - } - -} - diff --git hcatalog/storage-handlers/hbase/src/gen-java/org/apache/hive/hcatalog/hbase/snapshot/transaction/thrift/StoreFamilyRevision.java hcatalog/storage-handlers/hbase/src/gen-java/org/apache/hive/hcatalog/hbase/snapshot/transaction/thrift/StoreFamilyRevision.java new file mode 100644 index 0000000..6c72030 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/gen-java/org/apache/hive/hcatalog/hbase/snapshot/transaction/thrift/StoreFamilyRevision.java @@ -0,0 +1,416 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * This class is used to store the revision and timestamp of a column family + * in a transaction. + * + */ +/** + * Autogenerated by Thrift Compiler (0.7.0) + * + * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + */ +package org.apache.hive.hcatalog.hbase.snapshot.transaction.thrift; + +import java.util.Map; +import java.util.HashMap; +import java.util.EnumMap; +import java.util.EnumSet; +import java.util.Collections; +import java.util.BitSet; + +public class StoreFamilyRevision implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { + private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("StoreFamilyRevision"); + + private static final org.apache.thrift.protocol.TField REVISION_FIELD_DESC = new org.apache.thrift.protocol.TField("revision", org.apache.thrift.protocol.TType.I64, (short) 1); + private static final org.apache.thrift.protocol.TField TIMESTAMP_FIELD_DESC = new org.apache.thrift.protocol.TField("timestamp", org.apache.thrift.protocol.TType.I64, (short) 2); + + public long revision; // required + public long timestamp; // required + + /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ + public enum _Fields implements org.apache.thrift.TFieldIdEnum { + REVISION((short) 1, "revision"), + TIMESTAMP((short) 2, "timestamp"); + + private static final Map byName = new HashMap(); + + static { + for (_Fields field : EnumSet.allOf(_Fields.class)) { + byName.put(field.getFieldName(), field); + } + } + + /** + * Find the _Fields constant that matches fieldId, or null if its not found. + */ + public static _Fields findByThriftId(int fieldId) { + switch (fieldId) { + case 1: // REVISION + return REVISION; + case 2: // TIMESTAMP + return TIMESTAMP; + default: + return null; + } + } + + /** + * Find the _Fields constant that matches fieldId, throwing an exception + * if it is not found. + */ + public static _Fields findByThriftIdOrThrow(int fieldId) { + _Fields fields = findByThriftId(fieldId); + if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); + return fields; + } + + /** + * Find the _Fields constant that matches name, or null if its not found. + */ + public static _Fields findByName(String name) { + return byName.get(name); + } + + private final short _thriftId; + private final String _fieldName; + + _Fields(short thriftId, String fieldName) { + _thriftId = thriftId; + _fieldName = fieldName; + } + + public short getThriftFieldId() { + return _thriftId; + } + + public String getFieldName() { + return _fieldName; + } + } + + // isset id assignments + private static final int __REVISION_ISSET_ID = 0; + private static final int __TIMESTAMP_ISSET_ID = 1; + private BitSet __isset_bit_vector = new BitSet(2); + + public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; + + static { + Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); + tmpMap.put(_Fields.REVISION, new org.apache.thrift.meta_data.FieldMetaData("revision", org.apache.thrift.TFieldRequirementType.DEFAULT, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); + tmpMap.put(_Fields.TIMESTAMP, new org.apache.thrift.meta_data.FieldMetaData("timestamp", org.apache.thrift.TFieldRequirementType.DEFAULT, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); + metaDataMap = Collections.unmodifiableMap(tmpMap); + org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(StoreFamilyRevision.class, metaDataMap); + } + + public StoreFamilyRevision() { + } + + public StoreFamilyRevision( + long revision, + long timestamp) { + this(); + this.revision = revision; + setRevisionIsSet(true); + this.timestamp = timestamp; + setTimestampIsSet(true); + } + + /** + * Performs a deep copy on other. + */ + public StoreFamilyRevision(StoreFamilyRevision other) { + __isset_bit_vector.clear(); + __isset_bit_vector.or(other.__isset_bit_vector); + this.revision = other.revision; + this.timestamp = other.timestamp; + } + + public StoreFamilyRevision deepCopy() { + return new StoreFamilyRevision(this); + } + + @Override + public void clear() { + setRevisionIsSet(false); + this.revision = 0; + setTimestampIsSet(false); + this.timestamp = 0; + } + + public long getRevision() { + return this.revision; + } + + public StoreFamilyRevision setRevision(long revision) { + this.revision = revision; + setRevisionIsSet(true); + return this; + } + + public void unsetRevision() { + __isset_bit_vector.clear(__REVISION_ISSET_ID); + } + + /** Returns true if field revision is set (has been assigned a value) and false otherwise */ + public boolean isSetRevision() { + return __isset_bit_vector.get(__REVISION_ISSET_ID); + } + + public void setRevisionIsSet(boolean value) { + __isset_bit_vector.set(__REVISION_ISSET_ID, value); + } + + public long getTimestamp() { + return this.timestamp; + } + + public StoreFamilyRevision setTimestamp(long timestamp) { + this.timestamp = timestamp; + setTimestampIsSet(true); + return this; + } + + public void unsetTimestamp() { + __isset_bit_vector.clear(__TIMESTAMP_ISSET_ID); + } + + /** Returns true if field timestamp is set (has been assigned a value) and false otherwise */ + public boolean isSetTimestamp() { + return __isset_bit_vector.get(__TIMESTAMP_ISSET_ID); + } + + public void setTimestampIsSet(boolean value) { + __isset_bit_vector.set(__TIMESTAMP_ISSET_ID, value); + } + + public void setFieldValue(_Fields field, Object value) { + switch (field) { + case REVISION: + if (value == null) { + unsetRevision(); + } else { + setRevision((Long) value); + } + break; + + case TIMESTAMP: + if (value == null) { + unsetTimestamp(); + } else { + setTimestamp((Long) value); + } + break; + + } + } + + public Object getFieldValue(_Fields field) { + switch (field) { + case REVISION: + return Long.valueOf(getRevision()); + + case TIMESTAMP: + return Long.valueOf(getTimestamp()); + + } + throw new IllegalStateException(); + } + + /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ + public boolean isSet(_Fields field) { + if (field == null) { + throw new IllegalArgumentException(); + } + + switch (field) { + case REVISION: + return isSetRevision(); + case TIMESTAMP: + return isSetTimestamp(); + } + throw new IllegalStateException(); + } + + @Override + public boolean equals(Object that) { + if (that == null) + return false; + if (that instanceof StoreFamilyRevision) + return this.equals((StoreFamilyRevision) that); + return false; + } + + public boolean equals(StoreFamilyRevision that) { + if (that == null) + return false; + + boolean this_present_revision = true; + boolean that_present_revision = true; + if (this_present_revision || that_present_revision) { + if (!(this_present_revision && that_present_revision)) + return false; + if (this.revision != that.revision) + return false; + } + + boolean this_present_timestamp = true; + boolean that_present_timestamp = true; + if (this_present_timestamp || that_present_timestamp) { + if (!(this_present_timestamp && that_present_timestamp)) + return false; + if (this.timestamp != that.timestamp) + return false; + } + + return true; + } + + @Override + public int hashCode() { + return 0; + } + + public int compareTo(StoreFamilyRevision other) { + if (!getClass().equals(other.getClass())) { + return getClass().getName().compareTo(other.getClass().getName()); + } + + int lastComparison = 0; + StoreFamilyRevision typedOther = (StoreFamilyRevision) other; + + lastComparison = Boolean.valueOf(isSetRevision()).compareTo(typedOther.isSetRevision()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetRevision()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.revision, typedOther.revision); + if (lastComparison != 0) { + return lastComparison; + } + } + lastComparison = Boolean.valueOf(isSetTimestamp()).compareTo(typedOther.isSetTimestamp()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetTimestamp()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.timestamp, typedOther.timestamp); + if (lastComparison != 0) { + return lastComparison; + } + } + return 0; + } + + public _Fields fieldForId(int fieldId) { + return _Fields.findByThriftId(fieldId); + } + + public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { + org.apache.thrift.protocol.TField field; + iprot.readStructBegin(); + while (true) { + field = iprot.readFieldBegin(); + if (field.type == org.apache.thrift.protocol.TType.STOP) { + break; + } + switch (field.id) { + case 1: // REVISION + if (field.type == org.apache.thrift.protocol.TType.I64) { + this.revision = iprot.readI64(); + setRevisionIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); + } + break; + case 2: // TIMESTAMP + if (field.type == org.apache.thrift.protocol.TType.I64) { + this.timestamp = iprot.readI64(); + setTimestampIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); + } + break; + default: + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); + } + iprot.readFieldEnd(); + } + iprot.readStructEnd(); + + // check for required fields of primitive type, which can't be checked in the validate method + validate(); + } + + public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { + validate(); + + oprot.writeStructBegin(STRUCT_DESC); + oprot.writeFieldBegin(REVISION_FIELD_DESC); + oprot.writeI64(this.revision); + oprot.writeFieldEnd(); + oprot.writeFieldBegin(TIMESTAMP_FIELD_DESC); + oprot.writeI64(this.timestamp); + oprot.writeFieldEnd(); + oprot.writeFieldStop(); + oprot.writeStructEnd(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("StoreFamilyRevision("); + boolean first = true; + + sb.append("revision:"); + sb.append(this.revision); + first = false; + if (!first) sb.append(", "); + sb.append("timestamp:"); + sb.append(this.timestamp); + first = false; + sb.append(")"); + return sb.toString(); + } + + public void validate() throws org.apache.thrift.TException { + // check for required fields + } + + private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { + try { + write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); + } + } + + private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { + try { + // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. + __isset_bit_vector = new BitSet(1); + read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); + } + } + +} + diff --git hcatalog/storage-handlers/hbase/src/gen-java/org/apache/hive/hcatalog/hbase/snapshot/transaction/thrift/StoreFamilyRevisionList.java hcatalog/storage-handlers/hbase/src/gen-java/org/apache/hive/hcatalog/hbase/snapshot/transaction/thrift/StoreFamilyRevisionList.java new file mode 100644 index 0000000..9b04a64 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/gen-java/org/apache/hive/hcatalog/hbase/snapshot/transaction/thrift/StoreFamilyRevisionList.java @@ -0,0 +1,369 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * This class is used to store a list of StoreFamilyRevision for a column + * family in zookeeper. + * + */ +/** + * Autogenerated by Thrift Compiler (0.7.0) + * + * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + */ +package org.apache.hive.hcatalog.hbase.snapshot.transaction.thrift; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.EnumMap; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class StoreFamilyRevisionList implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { + private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("StoreFamilyRevisionList"); + + private static final org.apache.thrift.protocol.TField REVISION_LIST_FIELD_DESC = new org.apache.thrift.protocol.TField("revisionList", org.apache.thrift.protocol.TType.LIST, (short) 1); + + public List revisionList; // required + + /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ + public enum _Fields implements org.apache.thrift.TFieldIdEnum { + REVISION_LIST((short) 1, "revisionList"); + + private static final Map byName = new HashMap(); + + static { + for (_Fields field : EnumSet.allOf(_Fields.class)) { + byName.put(field.getFieldName(), field); + } + } + + /** + * Find the _Fields constant that matches fieldId, or null if its not found. + */ + public static _Fields findByThriftId(int fieldId) { + switch (fieldId) { + case 1: // REVISION_LIST + return REVISION_LIST; + default: + return null; + } + } + + /** + * Find the _Fields constant that matches fieldId, throwing an exception + * if it is not found. + */ + public static _Fields findByThriftIdOrThrow(int fieldId) { + _Fields fields = findByThriftId(fieldId); + if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); + return fields; + } + + /** + * Find the _Fields constant that matches name, or null if its not found. + */ + public static _Fields findByName(String name) { + return byName.get(name); + } + + private final short _thriftId; + private final String _fieldName; + + _Fields(short thriftId, String fieldName) { + _thriftId = thriftId; + _fieldName = fieldName; + } + + public short getThriftFieldId() { + return _thriftId; + } + + public String getFieldName() { + return _fieldName; + } + } + + // isset id assignments + + public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; + + static { + Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); + tmpMap.put(_Fields.REVISION_LIST, new org.apache.thrift.meta_data.FieldMetaData("revisionList", org.apache.thrift.TFieldRequirementType.DEFAULT, + new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, + new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, StoreFamilyRevision.class)))); + metaDataMap = Collections.unmodifiableMap(tmpMap); + org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(StoreFamilyRevisionList.class, metaDataMap); + } + + public StoreFamilyRevisionList() { + } + + public StoreFamilyRevisionList( + List revisionList) { + this(); + this.revisionList = revisionList; + } + + /** + * Performs a deep copy on other. + */ + public StoreFamilyRevisionList(StoreFamilyRevisionList other) { + if (other.isSetRevisionList()) { + List __this__revisionList = new ArrayList(); + for (StoreFamilyRevision other_element : other.revisionList) { + __this__revisionList.add(new StoreFamilyRevision(other_element)); + } + this.revisionList = __this__revisionList; + } + } + + public StoreFamilyRevisionList deepCopy() { + return new StoreFamilyRevisionList(this); + } + + @Override + public void clear() { + this.revisionList = null; + } + + public int getRevisionListSize() { + return (this.revisionList == null) ? 0 : this.revisionList.size(); + } + + public java.util.Iterator getRevisionListIterator() { + return (this.revisionList == null) ? null : this.revisionList.iterator(); + } + + public void addToRevisionList(StoreFamilyRevision elem) { + if (this.revisionList == null) { + this.revisionList = new ArrayList(); + } + this.revisionList.add(elem); + } + + public List getRevisionList() { + return this.revisionList; + } + + public StoreFamilyRevisionList setRevisionList(List revisionList) { + this.revisionList = revisionList; + return this; + } + + public void unsetRevisionList() { + this.revisionList = null; + } + + /** Returns true if field revisionList is set (has been assigned a value) and false otherwise */ + public boolean isSetRevisionList() { + return this.revisionList != null; + } + + public void setRevisionListIsSet(boolean value) { + if (!value) { + this.revisionList = null; + } + } + + public void setFieldValue(_Fields field, Object value) { + switch (field) { + case REVISION_LIST: + if (value == null) { + unsetRevisionList(); + } else { + setRevisionList((List) value); + } + break; + + } + } + + public Object getFieldValue(_Fields field) { + switch (field) { + case REVISION_LIST: + return getRevisionList(); + + } + throw new IllegalStateException(); + } + + /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ + public boolean isSet(_Fields field) { + if (field == null) { + throw new IllegalArgumentException(); + } + + switch (field) { + case REVISION_LIST: + return isSetRevisionList(); + } + throw new IllegalStateException(); + } + + @Override + public boolean equals(Object that) { + if (that == null) + return false; + if (that instanceof StoreFamilyRevisionList) + return this.equals((StoreFamilyRevisionList) that); + return false; + } + + public boolean equals(StoreFamilyRevisionList that) { + if (that == null) + return false; + + boolean this_present_revisionList = true && this.isSetRevisionList(); + boolean that_present_revisionList = true && that.isSetRevisionList(); + if (this_present_revisionList || that_present_revisionList) { + if (!(this_present_revisionList && that_present_revisionList)) + return false; + if (!this.revisionList.equals(that.revisionList)) + return false; + } + + return true; + } + + @Override + public int hashCode() { + return 0; + } + + public int compareTo(StoreFamilyRevisionList other) { + if (!getClass().equals(other.getClass())) { + return getClass().getName().compareTo(other.getClass().getName()); + } + + int lastComparison = 0; + StoreFamilyRevisionList typedOther = (StoreFamilyRevisionList) other; + + lastComparison = Boolean.valueOf(isSetRevisionList()).compareTo(typedOther.isSetRevisionList()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetRevisionList()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.revisionList, typedOther.revisionList); + if (lastComparison != 0) { + return lastComparison; + } + } + return 0; + } + + public _Fields fieldForId(int fieldId) { + return _Fields.findByThriftId(fieldId); + } + + public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { + org.apache.thrift.protocol.TField field; + iprot.readStructBegin(); + while (true) { + field = iprot.readFieldBegin(); + if (field.type == org.apache.thrift.protocol.TType.STOP) { + break; + } + switch (field.id) { + case 1: // REVISION_LIST + if (field.type == org.apache.thrift.protocol.TType.LIST) { + { + org.apache.thrift.protocol.TList _list0 = iprot.readListBegin(); + this.revisionList = new ArrayList(_list0.size); + for (int _i1 = 0; _i1 < _list0.size; ++_i1) { + StoreFamilyRevision _elem2; // required + _elem2 = new StoreFamilyRevision(); + _elem2.read(iprot); + this.revisionList.add(_elem2); + } + iprot.readListEnd(); + } + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); + } + break; + default: + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); + } + iprot.readFieldEnd(); + } + iprot.readStructEnd(); + + // check for required fields of primitive type, which can't be checked in the validate method + validate(); + } + + public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { + validate(); + + oprot.writeStructBegin(STRUCT_DESC); + if (this.revisionList != null) { + oprot.writeFieldBegin(REVISION_LIST_FIELD_DESC); + { + oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, this.revisionList.size())); + for (StoreFamilyRevision _iter3 : this.revisionList) { + _iter3.write(oprot); + } + oprot.writeListEnd(); + } + oprot.writeFieldEnd(); + } + oprot.writeFieldStop(); + oprot.writeStructEnd(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("StoreFamilyRevisionList("); + boolean first = true; + + sb.append("revisionList:"); + if (this.revisionList == null) { + sb.append("null"); + } else { + sb.append(this.revisionList); + } + first = false; + sb.append(")"); + return sb.toString(); + } + + public void validate() throws org.apache.thrift.TException { + // check for required fields + } + + private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { + try { + write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); + } + } + + private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { + try { + read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); + } + } + +} + diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseAuthorizationProvider.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseAuthorizationProvider.java deleted file mode 100644 index ee80389..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseAuthorizationProvider.java +++ /dev/null @@ -1,144 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase; - -import java.util.List; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.ql.metadata.AuthorizationException; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Partition; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider; -import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; -import org.apache.hadoop.hive.ql.security.authorization.Privilege; - -/** - * This class is an implementation of HiveAuthorizationProvider to provide - * authorization functionality for HBase tables. - */ -class HBaseAuthorizationProvider implements HiveAuthorizationProvider { - - @Override - public Configuration getConf() { - return null; - } - - @Override - public void setConf(Configuration conf) { - } - - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #init(org.apache.hadoop.conf.Configuration) - */ - @Override - public void init(Configuration conf) throws HiveException { - } - - @Override - public HiveAuthenticationProvider getAuthenticator() { - return null; - } - - @Override - public void setAuthenticator(HiveAuthenticationProvider authenticator) { - } - - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } - - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.metastore.api.Database, - * org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Database db, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } - - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.ql.metadata.Table, - * org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Table table, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } - - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.ql.metadata.Partition, - * org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Partition part, Privilege[] readRequiredPriv, - Privilege[] writeRequiredPriv) throws HiveException, - AuthorizationException { - } - - /* - * (non-Javadoc) - * - * @see - * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider - * #authorize(org.apache.hadoop.hive.ql.metadata.Table, - * org.apache.hadoop.hive.ql.metadata.Partition, java.util.List, - * org.apache.hadoop.hive.ql.security.authorization.Privilege[], - * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) - */ - @Override - public void authorize(Table table, Partition part, List columns, - Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) - throws HiveException, AuthorizationException { - } - -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBaseOutputFormat.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBaseOutputFormat.java deleted file mode 100644 index 7332af4..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBaseOutputFormat.java +++ /dev/null @@ -1,76 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase; - -import java.io.IOException; -import java.util.Properties; - -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.client.Put; -import org.apache.hadoop.hive.ql.io.HiveOutputFormat; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.OutputFormat; -import org.apache.hadoop.mapred.RecordWriter; -import org.apache.hadoop.util.Progressable; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.mapreduce.OutputJobInfo; - -public class HBaseBaseOutputFormat implements OutputFormat, Put>, - HiveOutputFormat, Put> { - - @Override - public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter( - JobConf jc, Path finalOutPath, - Class valueClass, boolean isCompressed, - Properties tableProperties, Progressable progress) - throws IOException { - throw new UnsupportedOperationException("Not implemented"); - } - - @Override - public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException { - OutputFormat, Put> outputFormat = getOutputFormat(job); - outputFormat.checkOutputSpecs(ignored, job); - } - - @Override - public RecordWriter, Put> getRecordWriter(FileSystem ignored, - JobConf job, String name, Progressable progress) throws IOException { - OutputFormat, Put> outputFormat = getOutputFormat(job); - return outputFormat.getRecordWriter(ignored, job, name, progress); - } - - private OutputFormat, Put> getOutputFormat(JobConf job) - throws IOException { - String outputInfo = job.get(HCatConstants.HCAT_KEY_OUTPUT_INFO); - OutputJobInfo outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(outputInfo); - OutputFormat, Put> outputFormat = null; - if (HBaseHCatStorageHandler.isBulkMode(outputJobInfo)) { - outputFormat = new HBaseBulkOutputFormat(); - } else { - outputFormat = new HBaseDirectOutputFormat(); - } - return outputFormat; - } -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBulkOutputFormat.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBulkOutputFormat.java deleted file mode 100644 index 4a188e0..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseBulkOutputFormat.java +++ /dev/null @@ -1,221 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase; - -import java.io.IOException; -import java.util.List; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.client.Put; -import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.hbase.security.User; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapred.FileOutputCommitter; -import org.apache.hadoop.mapred.FileOutputFormat; -import org.apache.hadoop.mapred.JobClient; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.JobContext; -import org.apache.hadoop.mapred.OutputCommitter; -import org.apache.hadoop.mapred.RecordWriter; -import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapred.SequenceFileOutputFormat; -import org.apache.hadoop.mapred.TaskAttemptContext; -import org.apache.hadoop.util.Progressable; -import org.apache.hcatalog.hbase.snapshot.RevisionManager; - -/** - * Class which imports data into HBase via it's "bulk load" feature. Wherein - * regions are created by the MR job using HFileOutputFormat and then later - * "moved" into the appropriate region server. - */ -class HBaseBulkOutputFormat extends HBaseBaseOutputFormat { - - private final static ImmutableBytesWritable EMPTY_LIST = new ImmutableBytesWritable( - new byte[0]); - private SequenceFileOutputFormat, Put> baseOutputFormat; - - public HBaseBulkOutputFormat() { - baseOutputFormat = new SequenceFileOutputFormat, Put>(); - } - - @Override - public void checkOutputSpecs(FileSystem ignored, JobConf job) - throws IOException { - baseOutputFormat.checkOutputSpecs(ignored, job); - HBaseUtil.addHBaseDelegationToken(job); - addJTDelegationToken(job); - } - - @Override - public RecordWriter, Put> getRecordWriter( - FileSystem ignored, JobConf job, String name, Progressable progress) - throws IOException { - job.setOutputKeyClass(ImmutableBytesWritable.class); - job.setOutputValueClass(Put.class); - long version = HBaseRevisionManagerUtil.getOutputRevision(job); - return new HBaseBulkRecordWriter(baseOutputFormat.getRecordWriter( - ignored, job, name, progress), version); - } - - private void addJTDelegationToken(JobConf job) throws IOException { - // Get jobTracker delegation token if security is enabled - // we need to launch the ImportSequenceFile job - if (User.isSecurityEnabled()) { - JobClient jobClient = new JobClient(new JobConf(job)); - try { - job.getCredentials().addToken(new Text("my mr token"), - jobClient.getDelegationToken(null)); - } catch (InterruptedException e) { - throw new IOException("Error while getting JT delegation token", e); - } - } - } - - private static class HBaseBulkRecordWriter implements - RecordWriter, Put> { - - private RecordWriter, Put> baseWriter; - private final Long outputVersion; - - public HBaseBulkRecordWriter( - RecordWriter, Put> baseWriter, - Long outputVersion) { - this.baseWriter = baseWriter; - this.outputVersion = outputVersion; - } - - @Override - public void write(WritableComparable key, Put value) - throws IOException { - Put put = value; - if (outputVersion != null) { - put = new Put(value.getRow(), outputVersion.longValue()); - for (List row : value.getFamilyMap().values()) { - for (KeyValue el : row) { - put.add(el.getFamily(), el.getQualifier(), el.getValue()); - } - } - } - // we ignore the key - baseWriter.write(EMPTY_LIST, put); - } - - @Override - public void close(Reporter reporter) throws IOException { - baseWriter.close(reporter); - } - } - - public static class HBaseBulkOutputCommitter extends OutputCommitter { - - private final OutputCommitter baseOutputCommitter; - - public HBaseBulkOutputCommitter() { - baseOutputCommitter = new FileOutputCommitter(); - } - - @Override - public void abortTask(TaskAttemptContext taskContext) - throws IOException { - baseOutputCommitter.abortTask(taskContext); - } - - @Override - public void commitTask(TaskAttemptContext taskContext) - throws IOException { - // baseOutputCommitter.commitTask(taskContext); - } - - @Override - public boolean needsTaskCommit(TaskAttemptContext taskContext) - throws IOException { - return baseOutputCommitter.needsTaskCommit(taskContext); - } - - @Override - public void setupJob(JobContext jobContext) throws IOException { - baseOutputCommitter.setupJob(jobContext); - } - - @Override - public void setupTask(TaskAttemptContext taskContext) - throws IOException { - baseOutputCommitter.setupTask(taskContext); - } - - @Override - public void abortJob(JobContext jobContext, int status) - throws IOException { - baseOutputCommitter.abortJob(jobContext, status); - RevisionManager rm = null; - try { - rm = HBaseRevisionManagerUtil - .getOpenedRevisionManager(jobContext.getConfiguration()); - rm.abortWriteTransaction(HBaseRevisionManagerUtil - .getWriteTransaction(jobContext.getConfiguration())); - } finally { - cleanIntermediate(jobContext); - if (rm != null) - rm.close(); - } - } - - @Override - public void commitJob(JobContext jobContext) throws IOException { - baseOutputCommitter.commitJob(jobContext); - RevisionManager rm = null; - try { - Configuration conf = jobContext.getConfiguration(); - Path srcPath = FileOutputFormat.getOutputPath(jobContext.getJobConf()); - if (!FileSystem.get(conf).exists(srcPath)) { - throw new IOException("Failed to bulk import hfiles. " + - "Intermediate data directory is cleaned up or missing. " + - "Please look at the bulk import job if it exists for failure reason"); - } - Path destPath = new Path(srcPath.getParent(), srcPath.getName() + "_hfiles"); - boolean success = ImportSequenceFile.runJob(jobContext, - conf.get(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY), - srcPath, - destPath); - if (!success) { - cleanIntermediate(jobContext); - throw new IOException("Failed to bulk import hfiles." + - " Please look at the bulk import job for failure reason"); - } - rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); - rm.commitWriteTransaction(HBaseRevisionManagerUtil.getWriteTransaction(conf)); - cleanIntermediate(jobContext); - } finally { - if (rm != null) - rm.close(); - } - } - - private void cleanIntermediate(JobContext jobContext) - throws IOException { - FileSystem fs = FileSystem.get(jobContext.getConfiguration()); - fs.delete(FileOutputFormat.getOutputPath(jobContext.getJobConf()), true); - } - } -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseConstants.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseConstants.java deleted file mode 100644 index 41e62c3..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseConstants.java +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase; - -import org.apache.hcatalog.common.HCatConstants; - -/** - * Constants class for constants used in HBase storage handler. - */ -class HBaseConstants { - - /** key used to store write transaction object */ - public static final String PROPERTY_WRITE_TXN_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".hbase.mapreduce.writeTxn"; - - /** key used to define the name of the table to write to */ - public static final String PROPERTY_OUTPUT_TABLE_NAME_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".hbase.mapreduce.outputTableName"; - - /** key used to define whether bulk storage output format will be used or not */ - public static final String PROPERTY_BULK_OUTPUT_MODE_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".hbase.output.bulkMode"; - - /** key used to define the hbase table snapshot. */ - public static final String PROPERTY_TABLE_SNAPSHOT_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + "hbase.table.snapshot"; - -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseDirectOutputFormat.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseDirectOutputFormat.java deleted file mode 100644 index b7537d4..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseDirectOutputFormat.java +++ /dev/null @@ -1,167 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase; - -import java.io.IOException; -import java.util.List; - -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.client.Put; -import org.apache.hadoop.hbase.mapred.TableOutputFormat; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.JobContext; -import org.apache.hadoop.mapred.OutputCommitter; -import org.apache.hadoop.mapred.RecordWriter; -import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapred.TaskAttemptContext; -import org.apache.hadoop.util.Progressable; -import org.apache.hcatalog.hbase.snapshot.RevisionManager; -import org.apache.hcatalog.hbase.snapshot.Transaction; - -/** - * "Direct" implementation of OutputFormat for HBase. Uses HTable client's put - * API to write each row to HBase one a time. Presently it is just using - * TableOutputFormat as the underlying implementation in the future we can tune - * this to make the writes faster such as permanently disabling WAL, caching, - * etc. - */ -class HBaseDirectOutputFormat extends HBaseBaseOutputFormat { - - private TableOutputFormat outputFormat; - - public HBaseDirectOutputFormat() { - this.outputFormat = new TableOutputFormat(); - } - - @Override - public RecordWriter, Put> getRecordWriter(FileSystem ignored, - JobConf job, String name, Progressable progress) - throws IOException { - long version = HBaseRevisionManagerUtil.getOutputRevision(job); - return new HBaseDirectRecordWriter(outputFormat.getRecordWriter(ignored, job, name, - progress), version); - } - - @Override - public void checkOutputSpecs(FileSystem ignored, JobConf job) - throws IOException { - outputFormat.checkOutputSpecs(ignored, job); - HBaseUtil.addHBaseDelegationToken(job); - } - - private static class HBaseDirectRecordWriter implements - RecordWriter, Put> { - - private RecordWriter, Put> baseWriter; - private final Long outputVersion; - - public HBaseDirectRecordWriter( - RecordWriter, Put> baseWriter, - Long outputVersion) { - this.baseWriter = baseWriter; - this.outputVersion = outputVersion; - } - - @Override - public void write(WritableComparable key, Put value) - throws IOException { - Put put = value; - if (outputVersion != null) { - put = new Put(value.getRow(), outputVersion.longValue()); - for (List row : value.getFamilyMap().values()) { - for (KeyValue el : row) { - put.add(el.getFamily(), el.getQualifier(), el.getValue()); - } - } - } - baseWriter.write(key, put); - } - - @Override - public void close(Reporter reporter) throws IOException { - baseWriter.close(reporter); - } - - } - - public static class HBaseDirectOutputCommitter extends OutputCommitter { - - public HBaseDirectOutputCommitter() throws IOException { - } - - @Override - public void abortTask(TaskAttemptContext taskContext) - throws IOException { - } - - @Override - public void commitTask(TaskAttemptContext taskContext) - throws IOException { - } - - @Override - public boolean needsTaskCommit(TaskAttemptContext taskContext) - throws IOException { - return false; - } - - @Override - public void setupJob(JobContext jobContext) throws IOException { - } - - @Override - public void setupTask(TaskAttemptContext taskContext) - throws IOException { - } - - @Override - public void abortJob(JobContext jobContext, int status) - throws IOException { - super.abortJob(jobContext, status); - RevisionManager rm = null; - try { - rm = HBaseRevisionManagerUtil - .getOpenedRevisionManager(jobContext.getConfiguration()); - Transaction writeTransaction = HBaseRevisionManagerUtil - .getWriteTransaction(jobContext.getConfiguration()); - rm.abortWriteTransaction(writeTransaction); - } finally { - if (rm != null) - rm.close(); - } - } - - @Override - public void commitJob(JobContext jobContext) throws IOException { - RevisionManager rm = null; - try { - rm = HBaseRevisionManagerUtil - .getOpenedRevisionManager(jobContext.getConfiguration()); - rm.commitWriteTransaction(HBaseRevisionManagerUtil.getWriteTransaction(jobContext - .getConfiguration())); - } finally { - if (rm != null) - rm.close(); - } - } - } -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseHCatStorageHandler.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseHCatStorageHandler.java deleted file mode 100644 index 31b7741..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseHCatStorageHandler.java +++ /dev/null @@ -1,610 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; - -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.HColumnDescriptor; -import org.apache.hadoop.hbase.HTableDescriptor; -import org.apache.hadoop.hbase.MasterNotRunningException; -import org.apache.hadoop.hbase.ZooKeeperConnectionException; -import org.apache.hadoop.hbase.client.HBaseAdmin; -import org.apache.hadoop.hbase.client.HTable; -import org.apache.hadoop.hbase.mapred.TableOutputFormat; -import org.apache.hadoop.hbase.mapreduce.TableInputFormat; -import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hive.hbase.HBaseSerDe; -import org.apache.hadoop.hive.metastore.HiveMetaHook; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.plan.TableDesc; -import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; -import org.apache.hadoop.hive.serde2.SerDe; -import org.apache.hadoop.mapred.InputFormat; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.OutputFormat; -import org.apache.hadoop.util.StringUtils; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.hbase.HBaseBulkOutputFormat.HBaseBulkOutputCommitter; -import org.apache.hcatalog.hbase.HBaseDirectOutputFormat.HBaseDirectOutputCommitter; -import org.apache.hcatalog.hbase.snapshot.RevisionManager; -import org.apache.hcatalog.hbase.snapshot.RevisionManagerConfiguration; -import org.apache.hcatalog.hbase.snapshot.Transaction; -import org.apache.hcatalog.mapreduce.HCatOutputFormat; -import org.apache.hcatalog.mapreduce.HCatTableInfo; -import org.apache.hcatalog.mapreduce.InputJobInfo; -import org.apache.hcatalog.mapreduce.OutputJobInfo; -import org.apache.hcatalog.mapreduce.HCatStorageHandler; -import org.apache.thrift.TBase; -import org.apache.zookeeper.ZooKeeper; - -import com.facebook.fb303.FacebookBase; -import com.google.common.util.concurrent.ThreadFactoryBuilder; - -/** - * This class HBaseHCatStorageHandler provides functionality to create HBase - * tables through HCatalog. The implementation is very similar to the - * HiveHBaseStorageHandler, with more details to suit HCatalog. - */ -public class HBaseHCatStorageHandler extends HCatStorageHandler implements HiveMetaHook, Configurable { - - public final static String DEFAULT_PREFIX = "default."; - private final static String PROPERTY_INT_OUTPUT_LOCATION = "hcat.hbase.mapreduce.intermediateOutputLocation"; - - private Configuration hbaseConf; - private Configuration jobConf; - private HBaseAdmin admin; - - @Override - public void configureInputJobProperties(TableDesc tableDesc, Map jobProperties) { - // Populate jobProperties with input table name, table columns, RM snapshot, - // hbase-default.xml and hbase-site.xml - Map tableJobProperties = tableDesc.getJobProperties(); - String jobString = tableJobProperties.get(HCatConstants.HCAT_KEY_JOB_INFO); - try { - InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(jobString); - HCatTableInfo tableInfo = inputJobInfo.getTableInfo(); - String qualifiedTableName = HBaseHCatStorageHandler.getFullyQualifiedHBaseTableName(tableInfo); - jobProperties.put(TableInputFormat.INPUT_TABLE, qualifiedTableName); - - Configuration jobConf = getJobConf(); - addResources(jobConf, jobProperties); - JobConf copyOfConf = new JobConf(jobConf); - HBaseConfiguration.addHbaseResources(copyOfConf); - //Getting hbase delegation token in getInputSplits does not work with PIG. So need to - //do it here - if (jobConf instanceof JobConf) { //Should be the case - HBaseUtil.addHBaseDelegationToken(copyOfConf); - ((JobConf) jobConf).getCredentials().addAll(copyOfConf.getCredentials()); - } - - String outputSchema = jobConf.get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA); - jobProperties.put(TableInputFormat.SCAN_COLUMNS, getScanColumns(tableInfo, outputSchema)); - - String serSnapshot = (String) inputJobInfo.getProperties().get( - HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY); - if (serSnapshot == null) { - HCatTableSnapshot snapshot = - HBaseRevisionManagerUtil.createSnapshot( - RevisionManagerConfiguration.create(copyOfConf), - qualifiedTableName, tableInfo); - jobProperties.put(HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY, - HCatUtil.serialize(snapshot)); - } - - //This adds it directly to the jobConf. Setting in jobProperties does not get propagated - //to JobConf as of now as the jobProperties is maintained per partition - //TODO: Remove when HCAT-308 is fixed - addOutputDependencyJars(jobConf); - jobProperties.put("tmpjars", jobConf.get("tmpjars")); - - } catch (IOException e) { - throw new IllegalStateException("Error while configuring job properties", e); - } - } - - @Override - public void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties) { - // Populate jobProperties with output table name, hbase-default.xml, hbase-site.xml, OutputJobInfo - // Populate RM transaction in OutputJobInfo - // In case of bulk mode, populate intermediate output location - Map tableJobProperties = tableDesc.getJobProperties(); - String jobString = tableJobProperties.get(HCatConstants.HCAT_KEY_OUTPUT_INFO); - try { - OutputJobInfo outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(jobString); - HCatTableInfo tableInfo = outputJobInfo.getTableInfo(); - String qualifiedTableName = HBaseHCatStorageHandler.getFullyQualifiedHBaseTableName(tableInfo); - jobProperties.put(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, qualifiedTableName); - jobProperties.put(TableOutputFormat.OUTPUT_TABLE, qualifiedTableName); - - Configuration jobConf = getJobConf(); - addResources(jobConf, jobProperties); - - Configuration copyOfConf = new Configuration(jobConf); - HBaseConfiguration.addHbaseResources(copyOfConf); - - String txnString = outputJobInfo.getProperties().getProperty( - HBaseConstants.PROPERTY_WRITE_TXN_KEY); - Transaction txn = null; - if (txnString == null) { - txn = HBaseRevisionManagerUtil.beginWriteTransaction(qualifiedTableName, tableInfo, - RevisionManagerConfiguration.create(copyOfConf)); - String serializedTxn = HCatUtil.serialize(txn); - outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, - serializedTxn); - } else { - txn = (Transaction) HCatUtil.deserialize(txnString); - } - if (isBulkMode(outputJobInfo)) { - String tableLocation = tableInfo.getTableLocation(); - String location = new Path(tableLocation, "REVISION_" + txn.getRevisionNumber()) - .toString(); - outputJobInfo.getProperties().setProperty(PROPERTY_INT_OUTPUT_LOCATION, location); - // We are writing out an intermediate sequenceFile hence - // location is not passed in OutputJobInfo.getLocation() - // TODO replace this with a mapreduce constant when available - jobProperties.put("mapred.output.dir", location); - jobProperties.put("mapred.output.committer.class", HBaseBulkOutputCommitter.class.getName()); - } else { - jobProperties.put("mapred.output.committer.class", HBaseDirectOutputCommitter.class.getName()); - } - - jobProperties.put(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); - addOutputDependencyJars(jobConf); - jobProperties.put("tmpjars", jobConf.get("tmpjars")); - - } catch (IOException e) { - throw new IllegalStateException("Error while configuring job properties", e); - } - } - - /* - * @return instance of HiveAuthorizationProvider - * - * @throws HiveException - * - * @see org.apache.hcatalog.storagehandler.HCatStorageHandler# - * getAuthorizationProvider() - */ - @Override - public HiveAuthorizationProvider getAuthorizationProvider() - throws HiveException { - - HBaseAuthorizationProvider hbaseAuth = new HBaseAuthorizationProvider(); - hbaseAuth.init(getConf()); - return hbaseAuth; - } - - /* - * @param table - * - * @throws MetaException - * - * @see org.apache.hcatalog.storagehandler.HCatStorageHandler - * #commitCreateTable(org.apache.hadoop.hive.metastore.api.Table) - */ - @Override - public void commitCreateTable(Table table) throws MetaException { - } - - /* - * @param instance of table - * - * @param deleteData - * - * @throws MetaException - * - * @see org.apache.hcatalog.storagehandler.HCatStorageHandler - * #commitDropTable(org.apache.hadoop.hive.metastore.api.Table, boolean) - */ - @Override - public void commitDropTable(Table tbl, boolean deleteData) - throws MetaException { - checkDeleteTable(tbl); - - } - - /* - * @param instance of table - * - * @throws MetaException - * - * @see org.apache.hcatalog.storagehandler.HCatStorageHandler - * #preCreateTable(org.apache.hadoop.hive.metastore.api.Table) - */ - @Override - public void preCreateTable(Table tbl) throws MetaException { - boolean isExternal = MetaStoreUtils.isExternalTable(tbl); - - hbaseConf = getConf(); - - if (tbl.getSd().getLocation() != null) { - throw new MetaException("LOCATION may not be specified for HBase."); - } - - try { - String tableName = getFullyQualifiedHBaseTableName(tbl); - String hbaseColumnsMapping = tbl.getParameters().get( - HBaseSerDe.HBASE_COLUMNS_MAPPING); - - if (hbaseColumnsMapping == null) { - throw new MetaException( - "No hbase.columns.mapping defined in table" - + " properties."); - } - - List hbaseColumnFamilies = new ArrayList(); - List hbaseColumnQualifiers = new ArrayList(); - List hbaseColumnFamiliesBytes = new ArrayList(); - int iKey = HBaseUtil.parseColumnMapping(hbaseColumnsMapping, - hbaseColumnFamilies, hbaseColumnFamiliesBytes, - hbaseColumnQualifiers, null); - - HTableDescriptor tableDesc; - Set uniqueColumnFamilies = new HashSet(); - if (!getHBaseAdmin().tableExists(tableName)) { - // if it is not an external table then create one - if (!isExternal) { - // Create the column descriptors - tableDesc = new HTableDescriptor(tableName); - uniqueColumnFamilies.addAll(hbaseColumnFamilies); - uniqueColumnFamilies.remove(hbaseColumnFamilies.get(iKey)); - - for (String columnFamily : uniqueColumnFamilies) { - HColumnDescriptor familyDesc = new HColumnDescriptor(Bytes - .toBytes(columnFamily)); - familyDesc.setMaxVersions(Integer.MAX_VALUE); - tableDesc.addFamily(familyDesc); - } - - getHBaseAdmin().createTable(tableDesc); - } else { - // an external table - throw new MetaException("HBase table " + tableName - + " doesn't exist while the table is " - + "declared as an external table."); - } - - } else { - if (!isExternal) { - throw new MetaException("Table " + tableName - + " already exists within HBase." - + " Use CREATE EXTERNAL TABLE instead to" - + " register it in HCatalog."); - } - // make sure the schema mapping is right - tableDesc = getHBaseAdmin().getTableDescriptor( - Bytes.toBytes(tableName)); - - for (int i = 0; i < hbaseColumnFamilies.size(); i++) { - if (i == iKey) { - continue; - } - - if (!tableDesc.hasFamily(hbaseColumnFamiliesBytes.get(i))) { - throw new MetaException("Column Family " - + hbaseColumnFamilies.get(i) - + " is not defined in hbase table " + tableName); - } - } - } - - // ensure the table is online - new HTable(hbaseConf, tableDesc.getName()); - - //Set up table in revision manager. - RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hbaseConf); - rm.createTable(tableName, new ArrayList(uniqueColumnFamilies)); - - } catch (MasterNotRunningException mnre) { - throw new MetaException(StringUtils.stringifyException(mnre)); - } catch (IOException ie) { - throw new MetaException(StringUtils.stringifyException(ie)); - } catch (IllegalArgumentException iae) { - throw new MetaException(StringUtils.stringifyException(iae)); - } - - } - - /* - * @param table - * - * @throws MetaException - * - * @see org.apache.hcatalog.storagehandler.HCatStorageHandler - * #preDropTable(org.apache.hadoop.hive.metastore.api.Table) - */ - @Override - public void preDropTable(Table table) throws MetaException { - } - - /* - * @param table - * - * @throws MetaException - * - * @see org.apache.hcatalog.storagehandler.HCatStorageHandler - * #rollbackCreateTable(org.apache.hadoop.hive.metastore.api.Table) - */ - @Override - public void rollbackCreateTable(Table table) throws MetaException { - checkDeleteTable(table); - } - - /* - * @param table - * - * @throws MetaException - * - * @see org.apache.hcatalog.storagehandler.HCatStorageHandler - * #rollbackDropTable(org.apache.hadoop.hive.metastore.api.Table) - */ - @Override - public void rollbackDropTable(Table table) throws MetaException { - } - - /* - * @return instance of HiveMetaHook - * - * @see org.apache.hcatalog.storagehandler.HCatStorageHandler#getMetaHook() - */ - @Override - public HiveMetaHook getMetaHook() { - return this; - } - - private HBaseAdmin getHBaseAdmin() throws MetaException { - try { - if (admin == null) { - admin = new HBaseAdmin(this.getConf()); - } - return admin; - } catch (MasterNotRunningException mnre) { - throw new MetaException(StringUtils.stringifyException(mnre)); - } catch (ZooKeeperConnectionException zkce) { - throw new MetaException(StringUtils.stringifyException(zkce)); - } - } - - private String getFullyQualifiedHBaseTableName(Table tbl) { - String tableName = tbl.getParameters().get(HBaseSerDe.HBASE_TABLE_NAME); - if (tableName == null) { - tableName = tbl.getSd().getSerdeInfo().getParameters() - .get(HBaseSerDe.HBASE_TABLE_NAME); - } - if (tableName == null) { - if (tbl.getDbName().equals(MetaStoreUtils.DEFAULT_DATABASE_NAME)) { - tableName = tbl.getTableName(); - } else { - tableName = tbl.getDbName() + "." + tbl.getTableName(); - } - tableName = tableName.toLowerCase(); - } - return tableName; - } - - static String getFullyQualifiedHBaseTableName(HCatTableInfo tableInfo) { - String qualifiedName = tableInfo.getStorerInfo().getProperties() - .getProperty(HBaseSerDe.HBASE_TABLE_NAME); - if (qualifiedName == null) { - String databaseName = tableInfo.getDatabaseName(); - String tableName = tableInfo.getTableName(); - if ((databaseName == null) - || (databaseName.equals(MetaStoreUtils.DEFAULT_DATABASE_NAME))) { - qualifiedName = tableName; - } else { - qualifiedName = databaseName + "." + tableName; - } - qualifiedName = qualifiedName.toLowerCase(); - } - return qualifiedName; - } - - @Override - public Class getInputFormatClass() { - return HBaseInputFormat.class; - } - - @Override - public Class getOutputFormatClass() { - return HBaseBaseOutputFormat.class; - } - - /* - * @return subclass of SerDe - * - * @throws UnsupportedOperationException - * - * @see - * org.apache.hcatalog.storagehandler.HCatStorageHandler#getSerDeClass() - */ - @Override - public Class getSerDeClass() - throws UnsupportedOperationException { - return HBaseSerDe.class; - } - - public Configuration getJobConf() { - return jobConf; - } - - @Override - public Configuration getConf() { - - if (hbaseConf == null) { - hbaseConf = HBaseConfiguration.create(); - } - return hbaseConf; - } - - @Override - public void setConf(Configuration conf) { - //setConf is called both during DDL operations and mapred read/write jobs. - //Creating a copy of conf for DDL and adding hbase-default and hbase-site.xml to it. - //For jobs, maintaining a reference instead of cloning as we need to - // 1) add hbase delegation token to the Credentials. - // 2) set tmpjars on it. Putting in jobProperties does not get propagated to JobConf - // in case of InputFormat as they are maintained per partition. - //Not adding hbase-default.xml and hbase-site.xml to jobConf as it will override any - //hbase properties set in the JobConf by the user. In configureInputJobProperties and - //configureOutputJobProperties, we take care of adding the default properties - //that are not already present. TODO: Change to a copy for jobs after HCAT-308 is fixed. - jobConf = conf; - hbaseConf = RevisionManagerConfiguration.create(HBaseConfiguration.create(conf)); - } - - private void checkDeleteTable(Table table) throws MetaException { - boolean isExternal = MetaStoreUtils.isExternalTable(table); - String tableName = getFullyQualifiedHBaseTableName(table); - RevisionManager rm = null; - try { - if (!isExternal && getHBaseAdmin().tableExists(tableName)) { - // we have created an HBase table, so we delete it to roll back; - if (getHBaseAdmin().isTableEnabled(tableName)) { - getHBaseAdmin().disableTable(tableName); - } - getHBaseAdmin().deleteTable(tableName); - - //Drop table in revision manager. - rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hbaseConf); - rm.dropTable(tableName); - } - } catch (IOException ie) { - throw new MetaException(StringUtils.stringifyException(ie)); - } finally { - HBaseRevisionManagerUtil.closeRevisionManagerQuietly(rm); - } - } - - /** - * Helper method for users to add the required depedency jars to distributed cache. - * @param conf - * @throws IOException - */ - private void addOutputDependencyJars(Configuration conf) throws IOException { - TableMapReduceUtil.addDependencyJars(conf, - //ZK - ZooKeeper.class, - //HBase - HTable.class, - //Hive - HiveException.class, - //HCatalog jar - HCatOutputFormat.class, - //hcat hbase storage handler jar - HBaseHCatStorageHandler.class, - //hive hbase storage handler jar - HBaseSerDe.class, - //hive jar - Table.class, - //libthrift jar - TBase.class, - //hbase jar - Bytes.class, - //thrift-fb303 .jar - FacebookBase.class, - //guava jar - ThreadFactoryBuilder.class); - } - - /** - * Utility method to add hbase-default.xml and hbase-site.xml properties to a new map - * if they are not already present in the jobConf. - * @param jobConf Job configuration - * @param newJobProperties Map to which new properties should be added - */ - private void addResources(Configuration jobConf, - Map newJobProperties) { - Configuration conf = new Configuration(false); - HBaseConfiguration.addHbaseResources(conf); - RevisionManagerConfiguration.addResources(conf); - for (Entry entry : conf) { - if (jobConf.get(entry.getKey()) == null) - newJobProperties.put(entry.getKey(), entry.getValue()); - } - } - - public static boolean isBulkMode(OutputJobInfo outputJobInfo) { - //Default is false - String bulkMode = outputJobInfo.getTableInfo().getStorerInfo().getProperties() - .getProperty(HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY, - "false"); - return "true".equals(bulkMode); - } - - private String getScanColumns(HCatTableInfo tableInfo, String outputColSchema) throws IOException { - StringBuilder builder = new StringBuilder(); - String hbaseColumnMapping = tableInfo.getStorerInfo().getProperties() - .getProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING); - if (outputColSchema == null) { - String[] splits = hbaseColumnMapping.split("[,]"); - for (int i = 0; i < splits.length; i++) { - if (!splits[i].equals(HBaseSerDe.HBASE_KEY_COL)) - builder.append(splits[i]).append(" "); - } - } else { - HCatSchema outputSchema = (HCatSchema) HCatUtil.deserialize(outputColSchema); - HCatSchema tableSchema = tableInfo.getDataColumns(); - List outputFieldNames = outputSchema.getFieldNames(); - List outputColumnMapping = new ArrayList(); - for (String fieldName : outputFieldNames) { - int position = tableSchema.getPosition(fieldName); - outputColumnMapping.add(position); - } - List columnFamilies = new ArrayList(); - List columnQualifiers = new ArrayList(); - HBaseUtil.parseColumnMapping(hbaseColumnMapping, columnFamilies, null, - columnQualifiers, null); - for (int i = 0; i < outputColumnMapping.size(); i++) { - int cfIndex = outputColumnMapping.get(i); - String cf = columnFamilies.get(cfIndex); - // We skip the key column. - if (cf.equals(HBaseSerDe.HBASE_KEY_COL) == false) { - String qualifier = columnQualifiers.get(i); - builder.append(cf); - builder.append(":"); - if (qualifier != null) { - builder.append(qualifier); - } - builder.append(" "); - } - } - } - //Remove the extra space delimiter - builder.deleteCharAt(builder.length() - 1); - return builder.toString(); - } - -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseInputFormat.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseInputFormat.java deleted file mode 100644 index bb81f5b..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseInputFormat.java +++ /dev/null @@ -1,126 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase; - -import java.io.IOException; -import java.util.List; - -import org.apache.hadoop.hbase.client.HTable; -import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hbase.client.Scan; -import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.hbase.mapred.TableSplit; -import org.apache.hadoop.hbase.mapreduce.TableInputFormat; -import org.apache.hcatalog.mapreduce.HCatMapRedUtil; -import org.apache.hadoop.mapred.InputFormat; -import org.apache.hadoop.mapred.InputSplit; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.RecordReader; -import org.apache.hadoop.mapred.Reporter; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.mapreduce.InputJobInfo; - -/** - * This class HBaseInputFormat is a wrapper class of TableInputFormat in HBase. - */ -class HBaseInputFormat implements InputFormat { - - private final TableInputFormat inputFormat; - - public HBaseInputFormat() { - inputFormat = new TableInputFormat(); - } - - /* - * @param instance of InputSplit - * - * @param instance of TaskAttemptContext - * - * @return RecordReader - * - * @throws IOException - * - * @throws InterruptedException - * - * @see - * org.apache.hadoop.mapreduce.InputFormat#createRecordReader(org.apache - * .hadoop.mapreduce.InputSplit, - * org.apache.hadoop.mapreduce.TaskAttemptContext) - */ - @Override - public RecordReader getRecordReader( - InputSplit split, JobConf job, Reporter reporter) - throws IOException { - String jobString = job.get(HCatConstants.HCAT_KEY_JOB_INFO); - InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(jobString); - - String tableName = job.get(TableInputFormat.INPUT_TABLE); - TableSplit tSplit = (TableSplit) split; - HbaseSnapshotRecordReader recordReader = new HbaseSnapshotRecordReader(inputJobInfo, job); - inputFormat.setConf(job); - Scan inputScan = inputFormat.getScan(); - // TODO: Make the caching configurable by the user - inputScan.setCaching(200); - inputScan.setCacheBlocks(false); - Scan sc = new Scan(inputScan); - sc.setStartRow(tSplit.getStartRow()); - sc.setStopRow(tSplit.getEndRow()); - recordReader.setScan(sc); - recordReader.setHTable(new HTable(job, tableName)); - recordReader.init(); - return recordReader; - } - - /* - * @param jobContext - * - * @return List of InputSplit - * - * @throws IOException - * - * @throws InterruptedException - * - * @see - * org.apache.hadoop.mapreduce.InputFormat#getSplits(org.apache.hadoop.mapreduce - * .JobContext) - */ - @Override - public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf job, int numSplits) - throws IOException { - inputFormat.setConf(job); - return convertSplits(inputFormat.getSplits(HCatMapRedUtil.createJobContext(job, null, - Reporter.NULL))); - } - - private InputSplit[] convertSplits(List splits) { - InputSplit[] converted = new InputSplit[splits.size()]; - for (int i = 0; i < splits.size(); i++) { - org.apache.hadoop.hbase.mapreduce.TableSplit tableSplit = - (org.apache.hadoop.hbase.mapreduce.TableSplit) splits.get(i); - TableSplit newTableSplit = new TableSplit(tableSplit.getTableName(), - tableSplit.getStartRow(), - tableSplit.getEndRow(), tableSplit.getRegionLocation()); - converted[i] = newTableSplit; - } - return converted; - } - -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseRevisionManagerUtil.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseRevisionManagerUtil.java deleted file mode 100644 index c2dafda..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseRevisionManagerUtil.java +++ /dev/null @@ -1,257 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.hbase.HBaseSerDe; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.hbase.snapshot.RevisionManager; -import org.apache.hcatalog.hbase.snapshot.RevisionManagerFactory; -import org.apache.hcatalog.hbase.snapshot.TableSnapshot; -import org.apache.hcatalog.hbase.snapshot.Transaction; -import org.apache.hcatalog.mapreduce.HCatTableInfo; -import org.apache.hcatalog.mapreduce.InputJobInfo; -import org.apache.hcatalog.mapreduce.OutputJobInfo; -import org.apache.hcatalog.mapreduce.StorerInfo; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -/** - * The Class HBaseRevisionManagerUtil has utility methods to interact with Revision Manager - * - */ -class HBaseRevisionManagerUtil { - - private final static Logger LOG = LoggerFactory.getLogger(HBaseRevisionManagerUtil.class); - - private HBaseRevisionManagerUtil() { - } - - /** - * Creates the latest snapshot of the table. - * - * @param jobConf The job configuration. - * @param hbaseTableName The fully qualified name of the HBase table. - * @param tableInfo HCat table information - * @return An instance of HCatTableSnapshot - * @throws IOException Signals that an I/O exception has occurred. - */ - static HCatTableSnapshot createSnapshot(Configuration jobConf, - String hbaseTableName, HCatTableInfo tableInfo) throws IOException { - - RevisionManager rm = null; - TableSnapshot snpt; - try { - rm = getOpenedRevisionManager(jobConf); - snpt = rm.createSnapshot(hbaseTableName); - } finally { - closeRevisionManagerQuietly(rm); - } - - HCatTableSnapshot hcatSnapshot = HBaseRevisionManagerUtil.convertSnapshot(snpt, tableInfo); - return hcatSnapshot; - } - - /** - * Creates the snapshot using the revision specified by the user. - * - * @param jobConf The job configuration. - * @param tableName The fully qualified name of the table whose snapshot is being taken. - * @param revision The revision number to use for the snapshot. - * @return An instance of HCatTableSnapshot. - * @throws IOException Signals that an I/O exception has occurred. - */ - static HCatTableSnapshot createSnapshot(Configuration jobConf, - String tableName, long revision) - throws IOException { - - TableSnapshot snpt; - RevisionManager rm = null; - try { - rm = getOpenedRevisionManager(jobConf); - snpt = rm.createSnapshot(tableName, revision); - } finally { - closeRevisionManagerQuietly(rm); - } - - String inputJobString = jobConf.get(HCatConstants.HCAT_KEY_JOB_INFO); - if (inputJobString == null) { - throw new IOException( - "InputJobInfo information not found in JobContext. " - + "HCatInputFormat.setInput() not called?"); - } - InputJobInfo inputInfo = (InputJobInfo) HCatUtil.deserialize(inputJobString); - HCatTableSnapshot hcatSnapshot = HBaseRevisionManagerUtil - .convertSnapshot(snpt, inputInfo.getTableInfo()); - - return hcatSnapshot; - } - - /** - * Gets an instance of revision manager which is opened. - * - * @param jobConf The job configuration. - * @return RevisionManager An instance of revision manager. - * @throws IOException - */ - static RevisionManager getOpenedRevisionManager(Configuration jobConf) throws IOException { - return RevisionManagerFactory.getOpenedRevisionManager(jobConf); - } - - static void closeRevisionManagerQuietly(RevisionManager rm) { - if (rm != null) { - try { - rm.close(); - } catch (IOException e) { - LOG.warn("Error while trying to close revision manager", e); - } - } - } - - - static HCatTableSnapshot convertSnapshot(TableSnapshot hbaseSnapshot, - HCatTableInfo hcatTableInfo) throws IOException { - - HCatSchema hcatTableSchema = hcatTableInfo.getDataColumns(); - Map hcatHbaseColMap = getHCatHBaseColumnMapping(hcatTableInfo); - HashMap revisionMap = new HashMap(); - - for (HCatFieldSchema fSchema : hcatTableSchema.getFields()) { - if (hcatHbaseColMap.containsKey(fSchema.getName())) { - String colFamily = hcatHbaseColMap.get(fSchema.getName()); - long revisionID = hbaseSnapshot.getRevision(colFamily); - revisionMap.put(fSchema.getName(), revisionID); - } - } - - HCatTableSnapshot hcatSnapshot = new HCatTableSnapshot( - hcatTableInfo.getDatabaseName(), hcatTableInfo.getTableName(), revisionMap, hbaseSnapshot.getLatestRevision()); - return hcatSnapshot; - } - - static TableSnapshot convertSnapshot(HCatTableSnapshot hcatSnapshot, - HCatTableInfo hcatTableInfo) throws IOException { - - HCatSchema hcatTableSchema = hcatTableInfo.getDataColumns(); - Map revisionMap = new HashMap(); - Map hcatHbaseColMap = getHCatHBaseColumnMapping(hcatTableInfo); - for (HCatFieldSchema fSchema : hcatTableSchema.getFields()) { - String colFamily = hcatHbaseColMap.get(fSchema.getName()); - if (hcatSnapshot.containsColumn(fSchema.getName())) { - long revision = hcatSnapshot.getRevision(fSchema.getName()); - revisionMap.put(colFamily, revision); - } - } - - String fullyQualifiedName = hcatSnapshot.getDatabaseName() + "." - + hcatSnapshot.getTableName(); - return new TableSnapshot(fullyQualifiedName, revisionMap, hcatSnapshot.getLatestRevision()); - - } - - /** - * Begins a transaction in the revision manager for the given table. - * @param qualifiedTableName Name of the table - * @param tableInfo HCat Table information - * @param jobConf Job Configuration - * @return The new transaction in revision manager - * @throws IOException - */ - static Transaction beginWriteTransaction(String qualifiedTableName, - HCatTableInfo tableInfo, Configuration jobConf) throws IOException { - Transaction txn; - RevisionManager rm = null; - try { - rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(jobConf); - String hBaseColumns = tableInfo.getStorerInfo().getProperties() - .getProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING); - String[] splits = hBaseColumns.split("[,:]"); - Set families = new HashSet(); - for (int i = 0; i < splits.length; i += 2) { - if (!splits[i].isEmpty()) - families.add(splits[i]); - } - txn = rm.beginWriteTransaction(qualifiedTableName, new ArrayList(families)); - } finally { - HBaseRevisionManagerUtil.closeRevisionManagerQuietly(rm); - } - return txn; - } - - static Transaction getWriteTransaction(Configuration conf) throws IOException { - OutputJobInfo outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - return (Transaction) HCatUtil.deserialize(outputJobInfo.getProperties() - .getProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY)); - } - - static void setWriteTransaction(Configuration conf, Transaction txn) throws IOException { - OutputJobInfo outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, HCatUtil.serialize(txn)); - conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); - } - - /** - * Get the Revision number that will be assigned to this job's output data - * @param conf configuration of the job - * @return the revision number used - * @throws IOException - */ - static long getOutputRevision(Configuration conf) throws IOException { - return getWriteTransaction(conf).getRevisionNumber(); - } - - private static Map getHCatHBaseColumnMapping(HCatTableInfo hcatTableInfo) - throws IOException { - - HCatSchema hcatTableSchema = hcatTableInfo.getDataColumns(); - StorerInfo storeInfo = hcatTableInfo.getStorerInfo(); - String hbaseColumnMapping = storeInfo.getProperties().getProperty( - HBaseSerDe.HBASE_COLUMNS_MAPPING); - - Map hcatHbaseColMap = new HashMap(); - List columnFamilies = new ArrayList(); - List columnQualifiers = new ArrayList(); - HBaseUtil.parseColumnMapping(hbaseColumnMapping, columnFamilies, - null, columnQualifiers, null); - - for (HCatFieldSchema column : hcatTableSchema.getFields()) { - int fieldPos = hcatTableSchema.getPosition(column.getName()); - String colFamily = columnFamilies.get(fieldPos); - if (colFamily.equals(HBaseSerDe.HBASE_KEY_COL) == false) { - hcatHbaseColMap.put(column.getName(), colFamily); - } - } - - return hcatHbaseColMap; - } - -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseUtil.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseUtil.java deleted file mode 100644 index 27e165f..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HBaseUtil.java +++ /dev/null @@ -1,159 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase; - -import java.io.IOException; -import java.util.List; - -import org.apache.hadoop.hbase.security.User; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hive.hbase.HBaseSerDe; -import org.apache.hadoop.mapred.JobConf; - -class HBaseUtil { - - private HBaseUtil() { - } - - /** - * Parses the HBase columns mapping to identify the column families, qualifiers - * and also caches the byte arrays corresponding to them. One of the HCat table - * columns maps to the HBase row key, by default the first column. - * - * @param columnMapping - the column mapping specification to be parsed - * @param colFamilies - the list of HBase column family names - * @param colFamiliesBytes - the corresponding byte array - * @param colQualifiers - the list of HBase column qualifier names - * @param colQualifiersBytes - the corresponding byte array - * @return the row key index in the column names list - * @throws IOException - */ - static int parseColumnMapping( - String columnMapping, - List colFamilies, - List colFamiliesBytes, - List colQualifiers, - List colQualifiersBytes) throws IOException { - - int rowKeyIndex = -1; - - if (colFamilies == null || colQualifiers == null) { - throw new IllegalArgumentException("Error: caller must pass in lists for the column families " + - "and qualifiers."); - } - - colFamilies.clear(); - colQualifiers.clear(); - - if (columnMapping == null) { - throw new IllegalArgumentException("Error: hbase.columns.mapping missing for this HBase table."); - } - - if (columnMapping.equals("") || columnMapping.equals(HBaseSerDe.HBASE_KEY_COL)) { - throw new IllegalArgumentException("Error: hbase.columns.mapping specifies only the HBase table" - + " row key. A valid Hive-HBase table must specify at least one additional column."); - } - - String[] mapping = columnMapping.split(","); - - for (int i = 0; i < mapping.length; i++) { - String elem = mapping[i]; - int idxFirst = elem.indexOf(":"); - int idxLast = elem.lastIndexOf(":"); - - if (idxFirst < 0 || !(idxFirst == idxLast)) { - throw new IllegalArgumentException("Error: the HBase columns mapping contains a badly formed " + - "column family, column qualifier specification."); - } - - if (elem.equals(HBaseSerDe.HBASE_KEY_COL)) { - rowKeyIndex = i; - colFamilies.add(elem); - colQualifiers.add(null); - } else { - String[] parts = elem.split(":"); - assert (parts.length > 0 && parts.length <= 2); - colFamilies.add(parts[0]); - - if (parts.length == 2) { - colQualifiers.add(parts[1]); - } else { - colQualifiers.add(null); - } - } - } - - if (rowKeyIndex == -1) { - colFamilies.add(0, HBaseSerDe.HBASE_KEY_COL); - colQualifiers.add(0, null); - rowKeyIndex = 0; - } - - if (colFamilies.size() != colQualifiers.size()) { - throw new IOException("Error in parsing the hbase columns mapping."); - } - - // populate the corresponding byte [] if the client has passed in a non-null list - if (colFamiliesBytes != null) { - colFamiliesBytes.clear(); - - for (String fam : colFamilies) { - colFamiliesBytes.add(Bytes.toBytes(fam)); - } - } - - if (colQualifiersBytes != null) { - colQualifiersBytes.clear(); - - for (String qual : colQualifiers) { - if (qual == null) { - colQualifiersBytes.add(null); - } else { - colQualifiersBytes.add(Bytes.toBytes(qual)); - } - } - } - - if (colFamiliesBytes != null && colQualifiersBytes != null) { - if (colFamiliesBytes.size() != colQualifiersBytes.size()) { - throw new IOException("Error in caching the bytes for the hbase column families " + - "and qualifiers."); - } - } - - return rowKeyIndex; - } - - /** - * Get delegation token from hbase and add it to JobConf - * @param job - * @throws IOException - */ - static void addHBaseDelegationToken(JobConf job) throws IOException { - if (User.isHBaseSecurityEnabled(job)) { - try { - User.getCurrent().obtainAuthTokenForJob(job); - } catch (InterruptedException e) { - throw new IOException("Error while obtaining hbase delegation token", e); - } - } - } - -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HCatTableSnapshot.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HCatTableSnapshot.java deleted file mode 100644 index 61a439e..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HCatTableSnapshot.java +++ /dev/null @@ -1,92 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.hbase; - -import java.io.Serializable; -import java.util.Map; - - -/** - * The class HCatTableSnapshot represents a snapshot of a hcatalog table. - * This class is intended to be opaque. This class would used only by the - * record readers to obtain knowledge about the revisions of a - * column to be filtered. - */ -public class HCatTableSnapshot implements Serializable { - - private static final long serialVersionUID = 1L; - private String tableName; - private String databaseName; - private Map columnMap; - private long latestRevision; - - HCatTableSnapshot(String databaseName, String tableName, Map columnMap, long latestRevision) { - this.tableName = tableName; - this.databaseName = databaseName; - this.columnMap = columnMap; - this.latestRevision = latestRevision; - } - - /** - * @return The name of the table in the snapshot. - */ - public String getTableName() { - return this.tableName; - } - - /** - * @return The name of the database to which the table snapshot belongs. - */ - public String getDatabaseName() { - return this.databaseName; - } - - /** - * @return The revision number of a column in a snapshot. - */ - long getRevision(String column) { - if (columnMap.containsKey(column)) - return this.columnMap.get(column); - return latestRevision; - } - - /** - * The method checks if the snapshot contains information about a data column. - * - * @param column The data column of the table - * @return true, if successful - */ - boolean containsColumn(String column) { - return this.columnMap.containsKey(column); - } - - /** - * @return latest committed revision when snapshot was taken - */ - long getLatestRevision() { - return latestRevision; - } - - @Override - public String toString() { - String snapshot = " Database Name: " + this.databaseName + " Table Name : " + tableName + - "Latest Revision: " + latestRevision + " Column revision : " + columnMap.toString(); - return snapshot; - } -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HbaseSnapshotRecordReader.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HbaseSnapshotRecordReader.java deleted file mode 100644 index 6f18846..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/HbaseSnapshotRecordReader.java +++ /dev/null @@ -1,255 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.hbase; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.client.HTable; -import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hbase.client.ResultScanner; -import org.apache.hadoop.hbase.client.Scan; -import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.io.DataInputBuffer; -import org.apache.hadoop.io.DataOutputBuffer; -import org.apache.hadoop.mapred.RecordReader; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.hbase.snapshot.FamilyRevision; -import org.apache.hcatalog.hbase.snapshot.RevisionManager; -import org.apache.hcatalog.hbase.snapshot.TableSnapshot; -import org.apache.hcatalog.mapreduce.InputJobInfo; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * The Class HbaseSnapshotRecordReader implements logic for filtering records - * based on snapshot. - */ -class HbaseSnapshotRecordReader implements RecordReader { - - static final Logger LOG = LoggerFactory.getLogger(HbaseSnapshotRecordReader.class); - private final InputJobInfo inpJobInfo; - private final Configuration conf; - private final int maxRevisions = 1; - private ResultScanner scanner; - private Scan scan; - private HTable htable; - private TableSnapshot snapshot; - private Iterator resultItr; - private Set allAbortedTransactions; - private DataOutputBuffer valueOut = new DataOutputBuffer(); - private DataInputBuffer valueIn = new DataInputBuffer(); - - HbaseSnapshotRecordReader(InputJobInfo inputJobInfo, Configuration conf) throws IOException { - this.inpJobInfo = inputJobInfo; - this.conf = conf; - String snapshotString = conf.get(HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY); - HCatTableSnapshot hcatSnapshot = (HCatTableSnapshot) HCatUtil - .deserialize(snapshotString); - this.snapshot = HBaseRevisionManagerUtil.convertSnapshot(hcatSnapshot, - inpJobInfo.getTableInfo()); - } - - public void init() throws IOException { - restart(scan.getStartRow()); - } - - public void restart(byte[] firstRow) throws IOException { - allAbortedTransactions = getAbortedTransactions(Bytes.toString(htable.getTableName()), scan); - long maxValidRevision = getMaximumRevision(scan, snapshot); - while (allAbortedTransactions.contains(maxValidRevision)) { - maxValidRevision--; - } - Scan newScan = new Scan(scan); - newScan.setStartRow(firstRow); - //TODO: See if filters in 0.92 can be used to optimize the scan - //TODO: Consider create a custom snapshot filter - //TODO: Make min revision a constant in RM - newScan.setTimeRange(0, maxValidRevision + 1); - newScan.setMaxVersions(); - this.scanner = this.htable.getScanner(newScan); - resultItr = this.scanner.iterator(); - } - - private Set getAbortedTransactions(String tableName, Scan scan) throws IOException { - Set abortedTransactions = new HashSet(); - RevisionManager rm = null; - try { - rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); - byte[][] families = scan.getFamilies(); - for (byte[] familyKey : families) { - String family = Bytes.toString(familyKey); - List abortedWriteTransactions = rm.getAbortedWriteTransactions( - tableName, family); - if (abortedWriteTransactions != null) { - for (FamilyRevision revision : abortedWriteTransactions) { - abortedTransactions.add(revision.getRevision()); - } - } - } - return abortedTransactions; - } finally { - HBaseRevisionManagerUtil.closeRevisionManagerQuietly(rm); - } - } - - private long getMaximumRevision(Scan scan, TableSnapshot snapshot) { - long maxRevision = 0; - byte[][] families = scan.getFamilies(); - for (byte[] familyKey : families) { - String family = Bytes.toString(familyKey); - long revision = snapshot.getRevision(family); - if (revision > maxRevision) - maxRevision = revision; - } - return maxRevision; - } - - /* - * @param htable The HTable ( of HBase) to use for the record reader. - * - */ - public void setHTable(HTable htable) { - this.htable = htable; - } - - /* - * @param scan The scan to be used for reading records. - * - */ - public void setScan(Scan scan) { - this.scan = scan; - } - - @Override - public ImmutableBytesWritable createKey() { - return new ImmutableBytesWritable(); - } - - @Override - public Result createValue() { - return new Result(); - } - - @Override - public long getPos() { - // This should be the ordinal tuple in the range; - // not clear how to calculate... - return 0; - } - - @Override - public float getProgress() throws IOException { - // Depends on the total number of tuples - return 0; - } - - @Override - public boolean next(ImmutableBytesWritable key, Result value) throws IOException { - if (this.resultItr == null) { - LOG.warn("The HBase result iterator is found null. It is possible" - + " that the record reader has already been closed."); - } else { - while (resultItr.hasNext()) { - Result temp = resultItr.next(); - Result hbaseRow = prepareResult(temp.list()); - if (hbaseRow != null) { - // Update key and value. Currently no way to avoid serialization/de-serialization - // as no setters are available. - key.set(hbaseRow.getRow()); - valueOut.reset(); - hbaseRow.write(valueOut); - valueIn.reset(valueOut.getData(), valueOut.getLength()); - value.readFields(valueIn); - return true; - } - - } - } - return false; - } - - private Result prepareResult(List keyvalues) { - - List finalKeyVals = new ArrayList(); - Map> qualValMap = new HashMap>(); - for (KeyValue kv : keyvalues) { - byte[] cf = kv.getFamily(); - byte[] qualifier = kv.getQualifier(); - String key = Bytes.toString(cf) + ":" + Bytes.toString(qualifier); - List kvs; - if (qualValMap.containsKey(key)) { - kvs = qualValMap.get(key); - } else { - kvs = new ArrayList(); - } - - String family = Bytes.toString(kv.getFamily()); - //Ignore aborted transactions - if (allAbortedTransactions.contains(kv.getTimestamp())) { - continue; - } - - long desiredTS = snapshot.getRevision(family); - if (kv.getTimestamp() <= desiredTS) { - kvs.add(kv); - } - qualValMap.put(key, kvs); - } - - Set keys = qualValMap.keySet(); - for (String cf : keys) { - List kvs = qualValMap.get(cf); - if (maxRevisions <= kvs.size()) { - for (int i = 0; i < maxRevisions; i++) { - finalKeyVals.add(kvs.get(i)); - } - } else { - finalKeyVals.addAll(kvs); - } - } - - if (finalKeyVals.size() == 0) { - return null; - } else { - KeyValue[] kvArray = new KeyValue[finalKeyVals.size()]; - finalKeyVals.toArray(kvArray); - return new Result(kvArray); - } - } - - /* - * @see org.apache.hadoop.hbase.mapred.TableRecordReader#close() - */ - @Override - public void close() { - this.resultItr = null; - this.scanner.close(); - } - -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/ImportSequenceFile.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/ImportSequenceFile.java deleted file mode 100644 index 7b8f037..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/ImportSequenceFile.java +++ /dev/null @@ -1,252 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase; - -import org.apache.hadoop.filecache.DistributedCache; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat; -import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles; -import org.apache.hadoop.hbase.mapreduce.PutSortReducer; -import org.apache.hadoop.hbase.mapreduce.hadoopbackport.TotalOrderPartitioner; - -import java.io.IOException; -import java.net.URI; -import java.util.Map; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.client.HTable; -import org.apache.hadoop.hbase.client.Put; -import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.JobStatus; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.TaskAttemptID; -import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; -import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hcatalog.mapreduce.HCatMapRedUtil; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import static org.apache.hadoop.hbase.mapreduce.hadoopbackport.TotalOrderPartitioner.DEFAULT_PATH; - - -/** - * MapReduce job which reads a series of Puts stored in a sequence file - * and imports the data into HBase. It needs to create the necessary HBase - * regions using HFileOutputFormat and then notify the correct region servers - * to doBulkLoad(). This will be used After an MR job has written the SequenceFile - * and data needs to be bulk loaded onto HBase. - */ -class ImportSequenceFile { - private final static Logger LOG = LoggerFactory.getLogger(ImportSequenceFile.class); - private final static String NAME = "HCatImportSequenceFile"; - private final static String IMPORTER_WORK_DIR = "_IMPORTER_MR_WORK_DIR"; - - - private static class SequenceFileImporter extends Mapper { - - @Override - public void map(ImmutableBytesWritable rowKey, Put value, - Context context) - throws IOException { - try { - context.write(new ImmutableBytesWritable(value.getRow()), value); - } catch (InterruptedException e) { - e.printStackTrace(); - } - } - } - - private static class ImporterOutputFormat extends HFileOutputFormat { - @Override - public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException { - final OutputCommitter baseOutputCommitter = super.getOutputCommitter(context); - - return new OutputCommitter() { - @Override - public void setupJob(JobContext jobContext) throws IOException { - baseOutputCommitter.setupJob(jobContext); - } - - @Override - public void setupTask(TaskAttemptContext taskContext) throws IOException { - baseOutputCommitter.setupTask(taskContext); - } - - @Override - public boolean needsTaskCommit(TaskAttemptContext taskContext) throws IOException { - return baseOutputCommitter.needsTaskCommit(taskContext); - } - - @Override - public void commitTask(TaskAttemptContext taskContext) throws IOException { - baseOutputCommitter.commitTask(taskContext); - } - - @Override - public void abortTask(TaskAttemptContext taskContext) throws IOException { - baseOutputCommitter.abortTask(taskContext); - } - - @Override - public void abortJob(JobContext jobContext, JobStatus.State state) throws IOException { - try { - baseOutputCommitter.abortJob(jobContext, state); - } finally { - cleanupScratch(jobContext); - } - } - - @Override - public void commitJob(JobContext jobContext) throws IOException { - try { - baseOutputCommitter.commitJob(jobContext); - Configuration conf = jobContext.getConfiguration(); - try { - //import hfiles - new LoadIncrementalHFiles(conf) - .doBulkLoad(HFileOutputFormat.getOutputPath(jobContext), - new HTable(conf, - conf.get(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY))); - } catch (Exception e) { - throw new IOException("BulkLoad failed.", e); - } - } finally { - cleanupScratch(jobContext); - } - } - - @Override - public void cleanupJob(JobContext context) throws IOException { - try { - baseOutputCommitter.cleanupJob(context); - } finally { - cleanupScratch(context); - } - } - - private void cleanupScratch(JobContext context) throws IOException { - FileSystem fs = FileSystem.get(context.getConfiguration()); - fs.delete(HFileOutputFormat.getOutputPath(context), true); - } - }; - } - } - - private static Job createSubmittableJob(Configuration conf, String tableName, Path inputDir, Path scratchDir, boolean localMode) - throws IOException { - Job job = new Job(conf, NAME + "_" + tableName); - job.setJarByClass(SequenceFileImporter.class); - FileInputFormat.setInputPaths(job, inputDir); - job.setInputFormatClass(SequenceFileInputFormat.class); - job.setMapperClass(SequenceFileImporter.class); - - HTable table = new HTable(conf, tableName); - job.setReducerClass(PutSortReducer.class); - FileOutputFormat.setOutputPath(job, scratchDir); - job.setMapOutputKeyClass(ImmutableBytesWritable.class); - job.setMapOutputValueClass(Put.class); - HFileOutputFormat.configureIncrementalLoad(job, table); - //override OutputFormatClass with our own so we can include cleanup in the committer - job.setOutputFormatClass(ImporterOutputFormat.class); - - //local mode doesn't support symbolic links so we have to manually set the actual path - if (localMode) { - String partitionFile = null; - for (URI uri : DistributedCache.getCacheFiles(job.getConfiguration())) { - if (DEFAULT_PATH.equals(uri.getFragment())) { - partitionFile = uri.toString(); - break; - } - } - partitionFile = partitionFile.substring(0, partitionFile.lastIndexOf("#")); - job.getConfiguration().set(TotalOrderPartitioner.PARTITIONER_PATH, partitionFile.toString()); - } - - return job; - } - - /** - * Method to run the Importer MapReduce Job. Normally will be called by another MR job - * during OutputCommitter.commitJob(). - * @param parentContext JobContext of the parent job - * @param tableName name of table to bulk load data into - * @param InputDir path of SequenceFile formatted data to read - * @param scratchDir temporary path for the Importer MR job to build the HFiles which will be imported - * @return - */ - static boolean runJob(JobContext parentContext, String tableName, Path InputDir, Path scratchDir) { - Configuration parentConf = parentContext.getConfiguration(); - Configuration conf = new Configuration(); - for (Map.Entry el : parentConf) { - if (el.getKey().startsWith("hbase.")) - conf.set(el.getKey(), el.getValue()); - if (el.getKey().startsWith("mapred.cache.archives")) - conf.set(el.getKey(), el.getValue()); - } - - //Inherit jar dependencies added to distributed cache loaded by parent job - conf.set("mapred.job.classpath.archives", parentConf.get("mapred.job.classpath.archives", "")); - conf.set("mapreduce.job.cache.archives.visibilities", parentConf.get("mapreduce.job.cache.archives.visibilities", "")); - - //Temporary fix until hbase security is ready - //We need the written HFile to be world readable so - //hbase regionserver user has the privileges to perform a hdfs move - if (parentConf.getBoolean("hadoop.security.authorization", false)) { - FsPermission.setUMask(conf, FsPermission.valueOf("----------")); - } - - conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); - conf.setBoolean(JobContext.JOB_CANCEL_DELEGATION_TOKEN, false); - - boolean localMode = "local".equals(conf.get("mapred.job.tracker")); - - boolean success = false; - try { - FileSystem fs = FileSystem.get(parentConf); - Path workDir = new Path(new Job(parentConf).getWorkingDirectory(), IMPORTER_WORK_DIR); - if (!fs.mkdirs(workDir)) - throw new IOException("Importer work directory already exists: " + workDir); - Job job = createSubmittableJob(conf, tableName, InputDir, scratchDir, localMode); - job.setWorkingDirectory(workDir); - job.getCredentials().addAll(parentContext.getCredentials()); - success = job.waitForCompletion(true); - fs.delete(workDir, true); - //We only cleanup on success because failure might've been caused by existence of target directory - if (localMode && success) { - new ImporterOutputFormat().getOutputCommitter(HCatMapRedUtil.createTaskAttemptContext(conf, new TaskAttemptID())).commitJob(job); - } - } catch (InterruptedException e) { - LOG.error("ImportSequenceFile Failed", e); - } catch (ClassNotFoundException e) { - LOG.error("ImportSequenceFile Failed", e); - } catch (IOException e) { - LOG.error("ImportSequenceFile Failed", e); - } - return success; - } - -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/ResultConverter.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/ResultConverter.java deleted file mode 100644 index 7f2418a..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/ResultConverter.java +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase; - -import org.apache.hadoop.hbase.client.Put; -import org.apache.hadoop.hbase.client.Result; -import org.apache.hcatalog.data.HCatRecord; - -import java.io.IOException; - -/** - * Interface used to define conversion of HCatRecord to and from Native HBase write (Put) and read (Result) objects. - * How the actual mapping is defined between an HBase Table's schema and an HCatalog Table's schema - * is up to the underlying implementation - */ -interface ResultConverter { - - /** - * convert HCatRecord instance to an HBase Put, used when writing out data. - * @param record instance to convert - * @return converted Put instance - * @throws IOException - */ - Put convert(HCatRecord record) throws IOException; - - /** - * convert HBase Result to HCatRecord instance, used when reading data. - * @param result instance to convert - * @return converted Result instance - * @throws IOException - */ - HCatRecord convert(Result result) throws IOException; - - /** - * Returns the hbase columns that are required for the scan. - * @return String containing hbase columns delimited by space. - * @throws IOException - */ - String getHBaseScanColumns() throws IOException; - -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/FamilyRevision.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/FamilyRevision.java deleted file mode 100644 index b5103fc..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/FamilyRevision.java +++ /dev/null @@ -1,71 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.hbase.snapshot; - - -/** - * A FamiliyRevision class consists of a revision number and a expiration - * timestamp. When a write transaction starts, the transaction - * object is appended to the transaction list of the each column - * family and stored in the corresponding znode. When a write transaction is - * committed, the transaction object is removed from the list. - */ -public class FamilyRevision implements - Comparable { - - private long revision; - - private long timestamp; - - /** - * Create a FamilyRevision object - * @param rev revision number - * @param ts expiration timestamp - */ - FamilyRevision(long rev, long ts) { - this.revision = rev; - this.timestamp = ts; - } - - public long getRevision() { - return revision; - } - - public long getExpireTimestamp() { - return timestamp; - } - - void setExpireTimestamp(long ts) { - timestamp = ts; - } - - @Override - public String toString() { - String description = "revision: " + revision + " ts: " + timestamp; - return description; - } - - @Override - public int compareTo(FamilyRevision o) { - long d = revision - o.getRevision(); - return (d < 0) ? -1 : (d > 0) ? 1 : 0; - } - - -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/IDGenerator.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/IDGenerator.java deleted file mode 100644 index a427544..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/IDGenerator.java +++ /dev/null @@ -1,145 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.hbase.snapshot; - -import java.io.IOException; -import java.nio.charset.Charset; - -import org.apache.hcatalog.hbase.snapshot.lock.LockListener; -import org.apache.hcatalog.hbase.snapshot.lock.WriteLock; -import org.apache.zookeeper.KeeperException; -import org.apache.zookeeper.ZooDefs.Ids; -import org.apache.zookeeper.ZooKeeper; -import org.apache.zookeeper.data.Stat; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -/** - * This class generates revision id's for transactions. - */ -class IDGenerator implements LockListener { - - private ZooKeeper zookeeper; - private String zNodeDataLoc; - private String zNodeLockBasePath; - private long id; - private static final Logger LOG = LoggerFactory.getLogger(IDGenerator.class); - - IDGenerator(ZooKeeper zookeeper, String tableName, String idGenNode) - throws IOException { - this.zookeeper = zookeeper; - this.zNodeDataLoc = idGenNode; - this.zNodeLockBasePath = PathUtil.getLockManagementNode(idGenNode); - } - - /** - * This method obtains a revision id for a transaction. - * - * @return revision ID - * @throws IOException - */ - public long obtainID() throws IOException { - WriteLock wLock = new WriteLock(zookeeper, zNodeLockBasePath, Ids.OPEN_ACL_UNSAFE); - wLock.setLockListener(this); - try { - boolean lockGrabbed = wLock.lock(); - if (lockGrabbed == false) { - //TO DO : Let this request queue up and try obtaining lock. - throw new IOException("Unable to obtain lock to obtain id."); - } else { - id = incrementAndReadCounter(); - } - } catch (KeeperException e) { - LOG.warn("Exception while obtaining lock for ID.", e); - throw new IOException("Exception while obtaining lock for ID.", e); - } catch (InterruptedException e) { - LOG.warn("Exception while obtaining lock for ID.", e); - throw new IOException("Exception while obtaining lock for ID.", e); - } finally { - wLock.unlock(); - } - return id; - } - - /** - * This method reads the latest revision ID that has been used. The ID - * returned by this method cannot be used for transaction. - * @return revision ID - * @throws IOException - */ - public long readID() throws IOException { - long curId; - try { - Stat stat = new Stat(); - byte[] data = zookeeper.getData(this.zNodeDataLoc, false, stat); - curId = Long.parseLong(new String(data, Charset.forName("UTF-8"))); - } catch (KeeperException e) { - LOG.warn("Exception while reading current revision id.", e); - throw new IOException("Exception while reading current revision id.", e); - } catch (InterruptedException e) { - LOG.warn("Exception while reading current revision id.", e); - throw new IOException("Exception while reading current revision id.", e); - } - - return curId; - } - - - private long incrementAndReadCounter() throws IOException { - - long curId, usedId; - try { - Stat stat = new Stat(); - byte[] data = zookeeper.getData(this.zNodeDataLoc, false, stat); - usedId = Long.parseLong((new String(data, Charset.forName("UTF-8")))); - curId = usedId + 1; - String lastUsedID = String.valueOf(curId); - zookeeper.setData(this.zNodeDataLoc, lastUsedID.getBytes(Charset.forName("UTF-8")), -1); - - } catch (KeeperException e) { - LOG.warn("Exception while incrementing revision id.", e); - throw new IOException("Exception while incrementing revision id. ", e); - } catch (InterruptedException e) { - LOG.warn("Exception while incrementing revision id.", e); - throw new IOException("Exception while incrementing revision id. ", e); - } - - return curId; - } - - /* - * @see org.apache.hcatalog.hbase.snapshot.lock.LockListener#lockAcquired() - */ - @Override - public void lockAcquired() { - - - } - - /* - * @see org.apache.hcatalog.hbase.snapshot.lock.LockListener#lockReleased() - */ - @Override - public void lockReleased() { - - } - - -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/PathUtil.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/PathUtil.java deleted file mode 100644 index 993308b..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/PathUtil.java +++ /dev/null @@ -1,132 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.hbase.snapshot; - - -/** - * The PathUtil class is a utility class to provide information about various - * znode paths. The following is the znode structure used for storing information. - * baseDir/ClockNode - * baseDir/TrasactionBasePath - * baseDir/TrasactionBasePath/TableA/revisionID - * baseDir/TrasactionBasePath/TableA/columnFamily-1 - * baseDir/TrasactionBasePath/TableA/columnFamily-1/runnningTxns - * baseDir/TrasactionBasePath/TableA/columnFamily-1/abortedTxns - * baseDir/TrasactionBasePath/TableB/revisionID - * baseDir/TrasactionBasePath/TableB/columnFamily-1 - * baseDir/TrasactionBasePath/TableB/columnFamily-1/runnningTxns - * baseDir/TrasactionBasePath/TableB/columnFamily-1/abortedTxns - - */ -public class PathUtil { - - static final String DATA_DIR = "/data"; - static final String CLOCK_NODE = "/clock"; - - /** - * This method returns the data path associated with the currently - * running transactions of a given table and column/column family. - * @param baseDir - * @param tableName - * @param columnFamily - * @return The path of the running transactions data. - */ - static String getRunningTxnInfoPath(String baseDir, String tableName, - String columnFamily) { - String txnBasePath = getTransactionBasePath(baseDir); - String path = txnBasePath + "/" + tableName + "/" + columnFamily - + "/runningTxns"; - return path; - } - - /** - * This method returns the data path associated with the aborted - * transactions of a given table and column/column family. - * @param baseDir The base directory for revision management. - * @param tableName The name of the table. - * @param columnFamily - * @return The path of the aborted transactions data. - */ - static String getAbortInformationPath(String baseDir, String tableName, - String columnFamily) { - String txnBasePath = getTransactionBasePath(baseDir); - String path = txnBasePath + "/" + tableName + "/" + columnFamily - + "/abortData"; - return path; - } - - /** - * Gets the revision id node for a given table. - * - * @param baseDir the base dir for revision management. - * @param tableName the table name - * @return the revision id node path. - */ - static String getRevisionIDNode(String baseDir, String tableName) { - String rmBasePath = getTransactionBasePath(baseDir); - String revisionIDNode = rmBasePath + "/" + tableName + "/idgen"; - return revisionIDNode; - } - - /** - * Gets the lock management node for any znode that needs to be locked. - * - * @param path the path of the znode. - * @return the lock management node path. - */ - static String getLockManagementNode(String path) { - String lockNode = path + "_locknode_"; - return lockNode; - } - - /** - * This method returns the base path for the transaction data. - * - * @param baseDir The base dir for revision management. - * @return The base path for the transaction data. - */ - static String getTransactionBasePath(String baseDir) { - String txnBaseNode = baseDir + DATA_DIR; - return txnBaseNode; - } - - /** - * Gets the txn data path for a given table. - * - * @param baseDir the base dir for revision management. - * @param tableName the table name - * @return the txn data path for the table. - */ - static String getTxnDataPath(String baseDir, String tableName) { - String txnBasePath = getTransactionBasePath(baseDir); - String path = txnBasePath + "/" + tableName; - return path; - } - - /** - * This method returns the data path for clock node. - * - * @param baseDir - * @return The data path for clock. - */ - static String getClockPath(String baseDir) { - String clockNode = baseDir + CLOCK_NODE; - return clockNode; - } -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RMConstants.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RMConstants.java deleted file mode 100644 index 4d6fa80..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RMConstants.java +++ /dev/null @@ -1,30 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase.snapshot; - -public class RMConstants { - public static final String REVISION_MGR_ENDPOINT_IMPL_CLASS = "revision.manager.endpoint.impl.class"; - - public static final String WRITE_TRANSACTION_TIMEOUT = "revision.manager.writeTxn.timeout"; - - public static final String ZOOKEEPER_HOSTLIST = "revision.manager.zk.hostList"; - - public static final String ZOOKEEPER_DATADIR = "revision.manager.zk.dataDir"; -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManager.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManager.java deleted file mode 100644 index 4a6f842..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManager.java +++ /dev/null @@ -1,148 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.hbase.snapshot; - -import org.apache.hadoop.conf.Configuration; - -import java.io.IOException; -import java.util.List; - -/** - * This interface provides APIs for implementing revision management. - */ -public interface RevisionManager { - /** - * Version property required by HBase to use this interface - * for CoprocessorProtocol / RPC. - */ - public static final long VERSION = 1L; // do not change - - /** - * Initialize the revision manager. - */ - public void initialize(Configuration conf); - - /** - * Opens the revision manager. - * - * @throws IOException - */ - public void open() throws IOException; - - /** - * Closes the revision manager. - * - * @throws IOException - */ - public void close() throws IOException; - - /** - * Setup revision management for a newly created hbase table. - * @param table the hbase table name - * @param columnFamilies the column families in the table - */ - public void createTable(String table, List columnFamilies) throws IOException; - - /** - * Remove table data from revision manager for a dropped table. - * @param table the hbase table name - */ - public void dropTable(String table) throws IOException; - - /** - * Start the write transaction. - * - * @param table - * @param families - * @return a new Transaction - * @throws IOException - */ - public Transaction beginWriteTransaction(String table, List families) - throws IOException; - - /** - * Start the write transaction. - * - * @param table - * @param families - * @param keepAlive - * @return a new Transaction - * @throws IOException - */ - public Transaction beginWriteTransaction(String table, - List families, long keepAlive) throws IOException; - - /** - * Commit the write transaction. - * - * @param transaction - * @throws IOException - */ - public void commitWriteTransaction(Transaction transaction) - throws IOException; - - /** - * Abort the write transaction. - * - * @param transaction - * @throws IOException - */ - public void abortWriteTransaction(Transaction transaction) - throws IOException; - - /** - * Get the list of aborted Transactions for a column family - * - * @param table the table name - * @param columnFamily the column family name - * @return a list of aborted WriteTransactions - * @throws java.io.IOException - */ - public List getAbortedWriteTransactions(String table, - String columnFamily) throws IOException; - - /** - * Create the latest snapshot of the table. - * - * @param tableName - * @return a new snapshot - * @throws IOException - */ - public TableSnapshot createSnapshot(String tableName) throws IOException; - - /** - * Create the snapshot of the table using the revision number. - * - * @param tableName - * @param revision - * @return a new snapshot - * @throws IOException - */ - public TableSnapshot createSnapshot(String tableName, long revision) - throws IOException; - - /** - * Extends the expiration of a transaction by the time indicated by keep alive. - * - * @param transaction - * @throws IOException - */ - public void keepAlive(Transaction transaction) throws IOException; - -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerConfiguration.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerConfiguration.java deleted file mode 100644 index d5c4329..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerConfiguration.java +++ /dev/null @@ -1,59 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase.snapshot; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.HBaseConfiguration; - -public class RevisionManagerConfiguration { - - - public static Configuration addResources(Configuration conf) { - conf.addDefaultResource("revision-manager-default.xml"); - conf.addResource("revision-manager-site.xml"); - return conf; - } - - /** - * Creates a Configuration with Revision Manager resources - * @return a Configuration with Revision Manager resources - */ - public static Configuration create() { - Configuration conf = new Configuration(); - return addResources(conf); - } - - /** - * Creates a clone of passed configuration. - * @param that Configuration to clone. - * @return a Configuration created with the revision-manager-*.xml files plus - * the given configuration. - */ - public static Configuration create(final Configuration that) { - Configuration conf = create(); - //we need to merge things instead of doing new Configuration(that) - //because of a bug in Configuration wherein the config - //set on the MR fronted will get loaded on the backend as resouce called job.xml - //hence adding resources on the backed could potentially overwrite properties - //set on the frontend which we shouldn't be doing here - HBaseConfiguration.merge(conf, that); - return conf; - } -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerEndpoint.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerEndpoint.java deleted file mode 100644 index 49d9ad1..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerEndpoint.java +++ /dev/null @@ -1,141 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.hbase.snapshot; - -import java.io.IOException; -import java.util.List; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.CoprocessorEnvironment; -import org.apache.hadoop.hbase.coprocessor.BaseEndpointCoprocessor; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Implementation of RevisionManager as HBase RPC endpoint. This class will control the lifecycle of - * and delegate to the actual RevisionManager implementation and make it available as a service - * hosted in the HBase region server (instead of running it in the client (storage handler). - * In the case of {@link ZKBasedRevisionManager} now only the region servers need write access to - * manage revision data. - */ -public class RevisionManagerEndpoint extends BaseEndpointCoprocessor implements RevisionManagerProtocol { - - private static final Logger LOGGER = - LoggerFactory.getLogger(RevisionManagerEndpoint.class.getName()); - - private RevisionManager rmImpl = null; - - @Override - public void start(CoprocessorEnvironment env) { - super.start(env); - try { - Configuration conf = RevisionManagerConfiguration.create(env.getConfiguration()); - String className = conf.get(RMConstants.REVISION_MGR_ENDPOINT_IMPL_CLASS, - ZKBasedRevisionManager.class.getName()); - LOGGER.debug("Using Revision Manager implementation: {}", className); - rmImpl = RevisionManagerFactory.getOpenedRevisionManager(className, conf); - } catch (IOException e) { - LOGGER.error("Failed to initialize revision manager", e); - } - } - - @Override - public void stop(CoprocessorEnvironment env) { - if (rmImpl != null) { - try { - rmImpl.close(); - } catch (IOException e) { - LOGGER.warn("Error closing revision manager.", e); - } - } - super.stop(env); - } - - @Override - public void initialize(Configuration conf) { - // do nothing, HBase controls life cycle - } - - @Override - public void open() throws IOException { - // do nothing, HBase controls life cycle - } - - @Override - public void close() throws IOException { - // do nothing, HBase controls life cycle - } - - @Override - public void createTable(String table, List columnFamilies) throws IOException { - rmImpl.createTable(table, columnFamilies); - } - - @Override - public void dropTable(String table) throws IOException { - rmImpl.dropTable(table); - } - - @Override - public Transaction beginWriteTransaction(String table, List families) - throws IOException { - return rmImpl.beginWriteTransaction(table, families); - } - - @Override - public Transaction beginWriteTransaction(String table, - List families, long keepAlive) throws IOException { - return rmImpl.beginWriteTransaction(table, families, keepAlive); - } - - @Override - public void commitWriteTransaction(Transaction transaction) - throws IOException { - rmImpl.commitWriteTransaction(transaction); - } - - @Override - public void abortWriteTransaction(Transaction transaction) - throws IOException { - rmImpl.abortWriteTransaction(transaction); - } - - @Override - public TableSnapshot createSnapshot(String tableName) throws IOException { - return rmImpl.createSnapshot(tableName); - } - - @Override - public TableSnapshot createSnapshot(String tableName, long revision) - throws IOException { - return rmImpl.createSnapshot(tableName, revision); - } - - @Override - public void keepAlive(Transaction transaction) throws IOException { - rmImpl.keepAlive(transaction); - } - - @Override - public List getAbortedWriteTransactions(String table, - String columnFamily) throws IOException { - return rmImpl.getAbortedWriteTransactions(table, columnFamily); - } - -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerEndpointClient.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerEndpointClient.java deleted file mode 100644 index c6ee50e..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerEndpointClient.java +++ /dev/null @@ -1,125 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase.snapshot; - -import java.io.IOException; -import java.util.List; - -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.client.HTable; -import org.apache.hadoop.hbase.util.Bytes; - -/** - * This class is nothing but a delegate for the enclosed proxy, - * which is created upon setting the configuration. - */ -public class RevisionManagerEndpointClient implements RevisionManager, Configurable { - - private Configuration conf = null; - private RevisionManager rmProxy; - - @Override - public Configuration getConf() { - return this.conf; - } - - @Override - public void setConf(Configuration arg0) { - this.conf = arg0; - } - - @Override - public void initialize(Configuration conf) { - // do nothing - } - - @Override - public void open() throws IOException { - // clone to adjust RPC settings unique to proxy - Configuration clonedConf = new Configuration(conf); - // conf.set("hbase.ipc.client.connect.max.retries", "0"); - // conf.setInt(HConstants.HBASE_CLIENT_RPC_MAXATTEMPTS, 1); - clonedConf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1); // do not retry RPC - HTable table = new HTable(clonedConf, HConstants.ROOT_TABLE_NAME); - rmProxy = table.coprocessorProxy(RevisionManagerProtocol.class, - Bytes.toBytes("anyRow")); - rmProxy.open(); - } - - @Override - public void close() throws IOException { - rmProxy.close(); - } - - @Override - public void createTable(String table, List columnFamilies) throws IOException { - rmProxy.createTable(table, columnFamilies); - } - - @Override - public void dropTable(String table) throws IOException { - rmProxy.dropTable(table); - } - - @Override - public Transaction beginWriteTransaction(String table, List families) throws IOException { - return rmProxy.beginWriteTransaction(table, families); - } - - @Override - public Transaction beginWriteTransaction(String table, List families, long keepAlive) - throws IOException { - return rmProxy.beginWriteTransaction(table, families, keepAlive); - } - - @Override - public void commitWriteTransaction(Transaction transaction) throws IOException { - rmProxy.commitWriteTransaction(transaction); - } - - @Override - public void abortWriteTransaction(Transaction transaction) throws IOException { - rmProxy.abortWriteTransaction(transaction); - } - - @Override - public List getAbortedWriteTransactions(String table, String columnFamily) - throws IOException { - return rmProxy.getAbortedWriteTransactions(table, columnFamily); - } - - @Override - public TableSnapshot createSnapshot(String tableName) throws IOException { - return rmProxy.createSnapshot(tableName); - } - - @Override - public TableSnapshot createSnapshot(String tableName, long revision) throws IOException { - return rmProxy.createSnapshot(tableName, revision); - } - - @Override - public void keepAlive(Transaction transaction) throws IOException { - rmProxy.keepAlive(transaction); - } - -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerFactory.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerFactory.java deleted file mode 100644 index 2d3d3e4..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerFactory.java +++ /dev/null @@ -1,105 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.hbase.snapshot; - -import java.io.IOException; - -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; - -/** - * Utility to instantiate the revision manager (not a true factory actually). - * Depends on HBase configuration to resolve ZooKeeper connection (when ZK is used). - */ -public class RevisionManagerFactory { - - public static final String REVISION_MGR_IMPL_CLASS = "revision.manager.impl.class"; - - /** - * Gets an instance of revision manager. - * - * @param conf The configuration required to created the revision manager. - * @return the revision manager An instance of revision manager. - * @throws IOException Signals that an I/O exception has occurred. - */ - private static RevisionManager getRevisionManager(String className, Configuration conf) throws IOException { - - RevisionManager revisionMgr; - ClassLoader classLoader = Thread.currentThread() - .getContextClassLoader(); - if (classLoader == null) { - classLoader = RevisionManagerFactory.class.getClassLoader(); - } - try { - Class revisionMgrClass = Class - .forName(className, true, classLoader).asSubclass(RevisionManager.class); - revisionMgr = (RevisionManager) revisionMgrClass.newInstance(); - revisionMgr.initialize(conf); - } catch (ClassNotFoundException e) { - throw new IOException( - "The implementation class of revision manager not found.", - e); - } catch (InstantiationException e) { - throw new IOException( - "Exception encountered during instantiating revision manager implementation.", - e); - } catch (IllegalAccessException e) { - throw new IOException( - "IllegalAccessException encountered during instantiating revision manager implementation.", - e); - } catch (IllegalArgumentException e) { - throw new IOException( - "IllegalArgumentException encountered during instantiating revision manager implementation.", - e); - } - return revisionMgr; - } - - /** - * Internally used by endpoint implementation to instantiate from different configuration setting. - * @param className - * @param conf - * @return the opened revision manager - * @throws IOException - */ - static RevisionManager getOpenedRevisionManager(String className, Configuration conf) throws IOException { - - RevisionManager revisionMgr = RevisionManagerFactory.getRevisionManager(className, conf); - if (revisionMgr instanceof Configurable) { - ((Configurable) revisionMgr).setConf(conf); - } - revisionMgr.open(); - return revisionMgr; - } - - /** - * Gets an instance of revision manager which is opened. - * The revision manager implementation can be specified as {@link #REVISION_MGR_IMPL_CLASS}, - * default is {@link ZKBasedRevisionManager}. - * @param conf revision manager configuration - * @return RevisionManager An instance of revision manager. - * @throws IOException - */ - public static RevisionManager getOpenedRevisionManager(Configuration conf) throws IOException { - String className = conf.get(RevisionManagerFactory.REVISION_MGR_IMPL_CLASS, - ZKBasedRevisionManager.class.getName()); - return getOpenedRevisionManager(className, conf); - } - -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerProtocol.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerProtocol.java deleted file mode 100644 index 4cbde74..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/RevisionManagerProtocol.java +++ /dev/null @@ -1,30 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.hbase.snapshot; - -import org.apache.hadoop.hbase.ipc.CoprocessorProtocol; - -/** - * Interface marker to implement RevisionManager as Coprocessor. - * (needs to extend CoprocessorProtocol) - */ -public interface RevisionManagerProtocol extends RevisionManager, - CoprocessorProtocol { - -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/TableSnapshot.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/TableSnapshot.java deleted file mode 100644 index fa94157..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/TableSnapshot.java +++ /dev/null @@ -1,90 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.hbase.snapshot; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -/** - * The snapshot for a table and a list of column families. - */ -public class TableSnapshot implements Serializable { - - private String name; - - private Map cfRevisionMap; - - private long latestRevision; - - - public TableSnapshot(String name, Map cfRevMap, long latestRevision) { - this.name = name; - if (cfRevMap == null) { - throw new IllegalArgumentException("revision map cannot be null"); - } - this.cfRevisionMap = cfRevMap; - this.latestRevision = latestRevision; - } - - /** - * Gets the table name. - * - * @return String The name of the table. - */ - public String getTableName() { - return name; - } - - /** - * Gets the column families. - * - * @return List A list of column families associated with the snapshot. - */ - public List getColumnFamilies(){ - return new ArrayList(this.cfRevisionMap.keySet()); - } - - /** - * Gets the revision. - * - * @param familyName The name of the column family. - * @return the revision - */ - public long getRevision(String familyName){ - if(cfRevisionMap.containsKey(familyName)) - return cfRevisionMap.get(familyName); - return latestRevision; - } - - /** - * @return the latest committed revision when this snapshot was taken - */ - public long getLatestRevision() { - return latestRevision; - } - - @Override - public String toString() { - String snapshot = "Table Name : " + name +" Latest Revision: " + latestRevision - + " Column Familiy revision : " + cfRevisionMap.toString(); - return snapshot; - } -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/Transaction.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/Transaction.java deleted file mode 100644 index 1d17ca5..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/Transaction.java +++ /dev/null @@ -1,116 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.hbase.snapshot; - -import java.io.Serializable; -import java.util.ArrayList; -import java.util.List; - -/** - * This class is responsible for storing information related to - * transactions. - */ -public class Transaction implements Serializable { - - private String tableName; - private List columnFamilies = new ArrayList(); - private long timeStamp; - private long keepAlive; - private long revision; - - - Transaction(String tableName, List columnFamilies, long revision, long timestamp) { - this.tableName = tableName; - this.columnFamilies = columnFamilies; - this.timeStamp = timestamp; - this.revision = revision; - } - - /** - * @return The revision number associated with a transaction. - */ - public long getRevisionNumber() { - return this.revision; - } - - /** - * @return The table name associated with a transaction. - */ - public String getTableName() { - return tableName; - } - - /** - * @return The column families associated with a transaction. - */ - public List getColumnFamilies() { - return columnFamilies; - } - - /** - * @return The expire timestamp associated with a transaction. - */ - long getTransactionExpireTimeStamp() { - return this.timeStamp + this.keepAlive; - } - - void setKeepAlive(long seconds) { - this.keepAlive = seconds; - } - - /** - * Gets the keep alive value. - * - * @return long The keep alive value for the transaction. - */ - public long getKeepAliveValue() { - return this.keepAlive; - } - - /** - * Gets the family revision info. - * - * @return FamilyRevision An instance of FamilyRevision associated with the transaction. - */ - FamilyRevision getFamilyRevisionInfo() { - return new FamilyRevision(revision, getTransactionExpireTimeStamp()); - } - - /** - * Keep alive transaction. This methods extends the expire timestamp of a - * transaction by the "keep alive" amount. - */ - void keepAliveTransaction() { - this.timeStamp = this.timeStamp + this.keepAlive; - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append("Revision : "); - sb.append(this.getRevisionNumber()); - sb.append(" Timestamp : "); - sb.append(this.getTransactionExpireTimeStamp()); - sb.append("\n").append("Table : "); - sb.append(this.tableName).append("\n"); - sb.append("Column Families : "); - sb.append(this.columnFamilies.toString()); - return sb.toString(); - } -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/ZKBasedRevisionManager.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/ZKBasedRevisionManager.java deleted file mode 100644 index f4556d1..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/ZKBasedRevisionManager.java +++ /dev/null @@ -1,461 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.hbase.snapshot; - -import java.io.IOException; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.HConstants; -import org.apache.hcatalog.hbase.snapshot.lock.LockListener; -import org.apache.hcatalog.hbase.snapshot.lock.WriteLock; -import org.apache.zookeeper.CreateMode; -import org.apache.zookeeper.KeeperException; -import org.apache.zookeeper.ZooDefs.Ids; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * The service for providing revision management to Hbase tables. - */ -public class ZKBasedRevisionManager implements RevisionManager { - - private static final Logger LOG = LoggerFactory.getLogger(ZKBasedRevisionManager.class); - private String zkHostList; - private String baseDir; - private ZKUtil zkUtil; - private long writeTxnTimeout; - - - /* - * @see org.apache.hcatalog.hbase.snapshot.RevisionManager#initialize() - */ - @Override - public void initialize(Configuration conf) { - conf = new Configuration(conf); - if (conf.get(RMConstants.ZOOKEEPER_HOSTLIST) == null) { - String zkHostList = conf.get(HConstants.ZOOKEEPER_QUORUM); - int port = conf.getInt(HConstants.ZOOKEEPER_CLIENT_PORT, - HConstants.DEFAULT_ZOOKEPER_CLIENT_PORT); - String[] splits = zkHostList.split(","); - StringBuffer sb = new StringBuffer(); - for (String split : splits) { - sb.append(split); - sb.append(':'); - sb.append(port); - sb.append(','); - } - sb.deleteCharAt(sb.length() - 1); - conf.set(RMConstants.ZOOKEEPER_HOSTLIST, sb.toString()); - } - this.zkHostList = conf.get(RMConstants.ZOOKEEPER_HOSTLIST); - this.baseDir = conf.get(RMConstants.ZOOKEEPER_DATADIR); - this.writeTxnTimeout = Long.parseLong(conf.get(RMConstants.WRITE_TRANSACTION_TIMEOUT)); - } - - /** - * Open a ZooKeeper connection - * @throws java.io.IOException - */ - - public void open() throws IOException { - zkUtil = new ZKUtil(zkHostList, this.baseDir); - zkUtil.createRootZNodes(); - LOG.info("Created root znodes for revision manager."); - } - - /** - * Close Zookeeper connection - */ - public void close() { - zkUtil.closeZKConnection(); - } - - private void checkInputParams(String table, List families) { - if (table == null) { - throw new IllegalArgumentException( - "The table name must be specified for reading."); - } - if (families == null || families.isEmpty()) { - throw new IllegalArgumentException( - "At least one column family should be specified for reading."); - } - } - - @Override - public void createTable(String table, List columnFamilies) throws IOException { - zkUtil.createRootZNodes(); - zkUtil.setUpZnodesForTable(table, columnFamilies); - } - - @Override - public void dropTable(String table) throws IOException { - zkUtil.deleteZNodes(table); - } - - /* @param table - /* @param families - /* @param keepAlive - /* @return - /* @throws IOException - * @see org.apache.hcatalog.hbase.snapshot.RevisionManager#beginWriteTransaction(java.lang.String, java.util.List, long) - */ - public Transaction beginWriteTransaction(String table, - List families, long keepAlive) throws IOException { - - checkInputParams(table, families); - zkUtil.setUpZnodesForTable(table, families); - long nextId = zkUtil.nextId(table); - long expireTimestamp = zkUtil.getTimeStamp(); - Transaction transaction = new Transaction(table, families, nextId, - expireTimestamp); - if (keepAlive != -1) { - transaction.setKeepAlive(keepAlive); - } else { - transaction.setKeepAlive(writeTxnTimeout); - } - - refreshTransactionList(transaction.getTableName()); - String lockPath = prepareLockNode(table); - WriteLock wLock = new WriteLock(zkUtil.getSession(), lockPath, - Ids.OPEN_ACL_UNSAFE); - RMLockListener myLockListener = new RMLockListener(); - wLock.setLockListener(myLockListener); - try { - boolean lockGrabbed = wLock.lock(); - if (lockGrabbed == false) { - //TO DO : Let this request queue up and try obtaining lock. - throw new IOException( - "Unable to obtain lock while beginning transaction. " - + transaction.toString()); - } else { - List colFamilies = transaction.getColumnFamilies(); - FamilyRevision revisionData = transaction.getFamilyRevisionInfo(); - for (String cfamily : colFamilies) { - String path = PathUtil.getRunningTxnInfoPath( - baseDir, table, cfamily); - zkUtil.updateData(path, revisionData, - ZKUtil.UpdateMode.APPEND); - } - } - } catch (KeeperException e) { - throw new IOException("Exception while obtaining lock.", e); - } catch (InterruptedException e) { - throw new IOException("Exception while obtaining lock.", e); - } finally { - wLock.unlock(); - } - - return transaction; - } - - /* @param table The table name. - /* @param families The column families involved in the transaction. - /* @return transaction The transaction which was started. - /* @throws IOException - * @see org.apache.hcatalog.hbase.snapshot.RevisionManager#beginWriteTransaction(java.lang.String, java.util.List) - */ - public Transaction beginWriteTransaction(String table, List families) - throws IOException { - return beginWriteTransaction(table, families, -1); - } - - /** - * This method commits a write transaction. - * @param transaction The revision information associated with transaction. - * @throws java.io.IOException - */ - public void commitWriteTransaction(Transaction transaction) throws IOException { - refreshTransactionList(transaction.getTableName()); - - String lockPath = prepareLockNode(transaction.getTableName()); - WriteLock wLock = new WriteLock(zkUtil.getSession(), lockPath, - Ids.OPEN_ACL_UNSAFE); - RMLockListener myLockListener = new RMLockListener(); - wLock.setLockListener(myLockListener); - try { - boolean lockGrabbed = wLock.lock(); - if (lockGrabbed == false) { - //TO DO : Let this request queue up and try obtaining lock. - throw new IOException( - "Unable to obtain lock while commiting transaction. " - + transaction.toString()); - } else { - String tableName = transaction.getTableName(); - List colFamilies = transaction.getColumnFamilies(); - FamilyRevision revisionData = transaction.getFamilyRevisionInfo(); - for (String cfamily : colFamilies) { - String path = PathUtil.getRunningTxnInfoPath( - baseDir, tableName, cfamily); - zkUtil.updateData(path, revisionData, - ZKUtil.UpdateMode.REMOVE); - } - - } - } catch (KeeperException e) { - throw new IOException("Exception while obtaining lock.", e); - } catch (InterruptedException e) { - throw new IOException("Exception while obtaining lock.", e); - } finally { - wLock.unlock(); - } - LOG.info("Write Transaction committed: " + transaction.toString()); - } - - /** - * This method aborts a write transaction. - * @param transaction - * @throws java.io.IOException - */ - public void abortWriteTransaction(Transaction transaction) throws IOException { - - refreshTransactionList(transaction.getTableName()); - String lockPath = prepareLockNode(transaction.getTableName()); - WriteLock wLock = new WriteLock(zkUtil.getSession(), lockPath, - Ids.OPEN_ACL_UNSAFE); - RMLockListener myLockListener = new RMLockListener(); - wLock.setLockListener(myLockListener); - try { - boolean lockGrabbed = wLock.lock(); - if (lockGrabbed == false) { - //TO DO : Let this request queue up and try obtaining lock. - throw new IOException( - "Unable to obtain lock while aborting transaction. " - + transaction.toString()); - } else { - String tableName = transaction.getTableName(); - List colFamilies = transaction.getColumnFamilies(); - FamilyRevision revisionData = transaction - .getFamilyRevisionInfo(); - for (String cfamily : colFamilies) { - String path = PathUtil.getRunningTxnInfoPath( - baseDir, tableName, cfamily); - zkUtil.updateData(path, revisionData, - ZKUtil.UpdateMode.REMOVE); - path = PathUtil.getAbortInformationPath(baseDir, - tableName, cfamily); - zkUtil.updateData(path, revisionData, - ZKUtil.UpdateMode.APPEND); - } - - } - } catch (KeeperException e) { - throw new IOException("Exception while obtaining lock.", e); - } catch (InterruptedException e) { - throw new IOException("Exception while obtaining lock.", e); - } finally { - wLock.unlock(); - } - LOG.info("Write Transaction aborted: " + transaction.toString()); - } - - - /* @param transaction - /* @throws IOException - * @see org.apache.hcatalog.hbase.snapshot.RevsionManager#keepAlive(org.apache.hcatalog.hbase.snapshot.Transaction) - */ - public void keepAlive(Transaction transaction) - throws IOException { - - refreshTransactionList(transaction.getTableName()); - transaction.keepAliveTransaction(); - String lockPath = prepareLockNode(transaction.getTableName()); - WriteLock wLock = new WriteLock(zkUtil.getSession(), lockPath, - Ids.OPEN_ACL_UNSAFE); - RMLockListener myLockListener = new RMLockListener(); - wLock.setLockListener(myLockListener); - try { - boolean lockGrabbed = wLock.lock(); - if (lockGrabbed == false) { - //TO DO : Let this request queue up and try obtaining lock. - throw new IOException( - "Unable to obtain lock for keep alive of transaction. " - + transaction.toString()); - } else { - String tableName = transaction.getTableName(); - List colFamilies = transaction.getColumnFamilies(); - FamilyRevision revisionData = transaction.getFamilyRevisionInfo(); - for (String cfamily : colFamilies) { - String path = PathUtil.getRunningTxnInfoPath( - baseDir, tableName, cfamily); - zkUtil.updateData(path, revisionData, - ZKUtil.UpdateMode.KEEP_ALIVE); - } - - } - } catch (KeeperException e) { - throw new IOException("Exception while obtaining lock.", e); - } catch (InterruptedException e) { - throw new IOException("Exception while obtaining lock.", e); - } finally { - wLock.unlock(); - } - - } - - /* This method allows the user to create latest snapshot of a - /* table. - /* @param tableName The table whose snapshot is being created. - /* @return TableSnapshot An instance of TableSnaphot - /* @throws IOException - * @see org.apache.hcatalog.hbase.snapshot.RevsionManager#createSnapshot(java.lang.String) - */ - public TableSnapshot createSnapshot(String tableName) throws IOException { - refreshTransactionList(tableName); - long latestID = zkUtil.currentID(tableName); - HashMap cfMap = new HashMap(); - List columnFamilyNames = zkUtil.getColumnFamiliesOfTable(tableName); - - for (String cfName : columnFamilyNames) { - String cfPath = PathUtil.getRunningTxnInfoPath(baseDir, tableName, cfName); - List tranxList = zkUtil.getTransactionList(cfPath); - long version; - if (!tranxList.isEmpty()) { - Collections.sort(tranxList); - // get the smallest running Transaction ID - long runningVersion = tranxList.get(0).getRevision(); - version = runningVersion - 1; - } else { - version = latestID; - } - cfMap.put(cfName, version); - } - - TableSnapshot snapshot = new TableSnapshot(tableName, cfMap, latestID); - LOG.debug("Created snapshot For table: " + tableName + " snapshot: " + snapshot); - return snapshot; - } - - /* This method allows the user to create snapshot of a - /* table with a given revision number. - /* @param tableName - /* @param revision - /* @return TableSnapshot - /* @throws IOException - * @see org.apache.hcatalog.hbase.snapshot.RevsionManager#createSnapshot(java.lang.String, long) - */ - public TableSnapshot createSnapshot(String tableName, long revision) throws IOException { - - long currentID = zkUtil.currentID(tableName); - if (revision > currentID) { - throw new IOException( - "The revision specified in the snapshot is higher than the current revision of the table."); - } - refreshTransactionList(tableName); - HashMap cfMap = new HashMap(); - List columnFamilies = zkUtil.getColumnFamiliesOfTable(tableName); - - for (String cf : columnFamilies) { - cfMap.put(cf, revision); - } - - return new TableSnapshot(tableName, cfMap, revision); - } - - /** - * Get the list of in-progress Transactions for a column family - * @param table the table name - * @param columnFamily the column family name - * @return a list of in-progress WriteTransactions - * @throws java.io.IOException - */ - List getRunningTransactions(String table, - String columnFamily) throws IOException { - String path = PathUtil.getRunningTxnInfoPath(baseDir, table, - columnFamily); - return zkUtil.getTransactionList(path); - } - - @Override - public List getAbortedWriteTransactions(String table, - String columnFamily) throws IOException { - String path = PathUtil.getAbortInformationPath(baseDir, table, columnFamily); - return zkUtil.getTransactionList(path); - } - - private void refreshTransactionList(String tableName) throws IOException { - String lockPath = prepareLockNode(tableName); - WriteLock wLock = new WriteLock(zkUtil.getSession(), lockPath, - Ids.OPEN_ACL_UNSAFE); - RMLockListener myLockListener = new RMLockListener(); - wLock.setLockListener(myLockListener); - try { - boolean lockGrabbed = wLock.lock(); - if (lockGrabbed == false) { - //TO DO : Let this request queue up and try obtaining lock. - throw new IOException( - "Unable to obtain lock while refreshing transactions of table " - + tableName + "."); - } else { - List cfPaths = zkUtil - .getColumnFamiliesOfTable(tableName); - for (String cf : cfPaths) { - String runningDataPath = PathUtil.getRunningTxnInfoPath( - baseDir, tableName, cf); - zkUtil.refreshTransactions(runningDataPath); - } - - } - } catch (KeeperException e) { - throw new IOException("Exception while obtaining lock.", e); - } catch (InterruptedException e) { - throw new IOException("Exception while obtaining lock.", e); - } finally { - wLock.unlock(); - } - - } - - private String prepareLockNode(String tableName) throws IOException { - String txnDataPath = PathUtil.getTxnDataPath(this.baseDir, tableName); - String lockPath = PathUtil.getLockManagementNode(txnDataPath); - zkUtil.ensurePathExists(lockPath, null, Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - return lockPath; - } - - /* - * This class is a listener class for the locks used in revision management. - * TBD: Use the following class to signal that that the lock is actually - * been granted. - */ - class RMLockListener implements LockListener { - - /* - * @see org.apache.hcatalog.hbase.snapshot.lock.LockListener#lockAcquired() - */ - @Override - public void lockAcquired() { - - } - - /* - * @see org.apache.hcatalog.hbase.snapshot.lock.LockListener#lockReleased() - */ - @Override - public void lockReleased() { - - } - - } - - -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/ZKUtil.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/ZKUtil.java deleted file mode 100644 index 58fd435..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/ZKUtil.java +++ /dev/null @@ -1,525 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.hbase.snapshot; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hcatalog.hbase.snapshot.transaction.thrift.StoreFamilyRevision; -import org.apache.hcatalog.hbase.snapshot.transaction.thrift.StoreFamilyRevisionList; -import org.apache.thrift.TBase; -import org.apache.thrift.TDeserializer; -import org.apache.thrift.TSerializer; -import org.apache.thrift.protocol.TBinaryProtocol; -import org.apache.zookeeper.CreateMode; -import org.apache.zookeeper.KeeperException; -import org.apache.zookeeper.WatchedEvent; -import org.apache.zookeeper.Watcher; -import org.apache.zookeeper.ZooDefs.Ids; -import org.apache.zookeeper.ZooKeeper; -import org.apache.zookeeper.ZooKeeper.States; -import org.apache.zookeeper.data.ACL; -import org.apache.zookeeper.data.Stat; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -class ZKUtil { - - private int DEFAULT_SESSION_TIMEOUT = 1000000; - private ZooKeeper zkSession; - private String baseDir; - private String connectString; - private static final Logger LOG = LoggerFactory.getLogger(ZKUtil.class); - - static enum UpdateMode { - APPEND, REMOVE, KEEP_ALIVE - } - - ; - - ZKUtil(String connection, String baseDir) { - this.connectString = connection; - this.baseDir = baseDir; - } - - /** - * This method creates znodes related to table. - * - * @param table The name of the table. - * @param families The list of column families of the table. - * @throws IOException - */ - void setUpZnodesForTable(String table, List families) - throws IOException { - - String transactionDataTablePath = PathUtil.getTxnDataPath(baseDir, table); - ensurePathExists(transactionDataTablePath, null, Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - for (String cf : families) { - String runningDataPath = PathUtil.getRunningTxnInfoPath( - this.baseDir, table, cf); - ensurePathExists(runningDataPath, null, Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - String abortDataPath = PathUtil.getAbortInformationPath( - this.baseDir, table, cf); - ensurePathExists(abortDataPath, null, Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - } - - } - - /** - * This method ensures that a given path exists in zookeeper. If the path - * does not exists, it creates one. - * - * @param path The path of znode that is required to exist. - * @param data The data to be associated with the znode. - * @param acl The ACLs required. - * @param flags The CreateMode for the znode. - * @throws IOException - */ - void ensurePathExists(String path, byte[] data, List acl, - CreateMode flags) throws IOException { - String[] dirs = path.split("/"); - String parentPath = ""; - for (String subDir : dirs) { - if (subDir.equals("") == false) { - parentPath = parentPath + "/" + subDir; - try { - Stat stat = getSession().exists(parentPath, false); - if (stat == null) { - getSession().create(parentPath, data, acl, flags); - } - } catch (Exception e) { - throw new IOException("Exception while creating path " - + parentPath, e); - } - } - } - - } - - /** - * This method returns a list of columns of a table which were used in any - * of the transactions. - * - * @param tableName The name of table. - * @return List The list of column families in table. - * @throws IOException - */ - List getColumnFamiliesOfTable(String tableName) throws IOException { - String path = PathUtil.getTxnDataPath(baseDir, tableName); - List children = null; - List columnFamlies = new ArrayList(); - try { - children = getSession().getChildren(path, false); - } catch (KeeperException e) { - LOG.warn("Caught: ", e); - throw new IOException("Exception while obtaining columns of table.", e); - } catch (InterruptedException e) { - LOG.warn("Caught: ", e); - throw new IOException("Exception while obtaining columns of table.", e); - } - - for (String child : children) { - if ((child.contains("idgen") == false) - && (child.contains("_locknode_") == false)) { - columnFamlies.add(child); - } - } - return columnFamlies; - } - - /** - * This method returns a time stamp for use by the transactions. - * - * @return long The current timestamp in zookeeper. - * @throws IOException - */ - long getTimeStamp() throws IOException { - long timeStamp; - Stat stat; - String clockPath = PathUtil.getClockPath(this.baseDir); - ensurePathExists(clockPath, null, Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - try { - getSession().exists(clockPath, false); - stat = getSession().setData(clockPath, null, -1); - - } catch (KeeperException e) { - LOG.warn("Caught: ", e); - throw new IOException("Exception while obtaining timestamp ", e); - } catch (InterruptedException e) { - LOG.warn("Caught: ", e); - throw new IOException("Exception while obtaining timestamp ", e); - } - timeStamp = stat.getMtime(); - return timeStamp; - } - - /** - * This method returns the next revision number to be used for any - * transaction purposes. - * - * @param tableName The name of the table. - * @return revision number The revision number last used by any transaction. - * @throws IOException - */ - long nextId(String tableName) throws IOException { - String idNode = PathUtil.getRevisionIDNode(this.baseDir, tableName); - ensurePathExists(idNode, Bytes.toBytes("0"), Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - String lockNode = PathUtil.getLockManagementNode(idNode); - ensurePathExists(lockNode, null, Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - IDGenerator idf = new IDGenerator(getSession(), tableName, idNode); - long id = idf.obtainID(); - return id; - } - - /** - * The latest used revision id of the table. - * - * @param tableName The name of the table. - * @return the long The revision number to use by any transaction. - * @throws IOException Signals that an I/O exception has occurred. - */ - long currentID(String tableName) throws IOException { - String idNode = PathUtil.getRevisionIDNode(this.baseDir, tableName); - ensurePathExists(idNode, Bytes.toBytes("0"), Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - String lockNode = PathUtil.getLockManagementNode(idNode); - ensurePathExists(lockNode, null, Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - IDGenerator idf = new IDGenerator(getSession(), tableName, idNode); - long id = idf.readID(); - return id; - } - - /** - * This methods retrieves the list of transaction information associated - * with each column/column family of a table. - * - * @param path The znode path - * @return List of FamilyRevision The list of transactions in the given path. - * @throws IOException - */ - List getTransactionList(String path) - throws IOException { - - byte[] data = getRawData(path, new Stat()); - ArrayList wtxnList = new ArrayList(); - if (data == null) { - return wtxnList; - } - StoreFamilyRevisionList txnList = new StoreFamilyRevisionList(); - deserialize(txnList, data); - Iterator itr = txnList.getRevisionListIterator(); - - while (itr.hasNext()) { - StoreFamilyRevision wtxn = itr.next(); - wtxnList.add(new FamilyRevision(wtxn.getRevision(), wtxn - .getTimestamp())); - } - - return wtxnList; - } - - /** - * This method returns the data associated with the path in zookeeper. - * - * @param path The znode path - * @param stat Zookeeper stat - * @return byte array The data stored in the znode. - * @throws IOException - */ - byte[] getRawData(String path, Stat stat) throws IOException { - byte[] data = null; - try { - data = getSession().getData(path, false, stat); - } catch (Exception e) { - throw new IOException( - "Exception while obtaining raw data from zookeeper path " - + path, e); - } - return data; - } - - /** - * This method created the basic znodes in zookeeper for revision - * management. - * - * @throws IOException - */ - void createRootZNodes() throws IOException { - String txnBaseNode = PathUtil.getTransactionBasePath(this.baseDir); - String clockNode = PathUtil.getClockPath(this.baseDir); - ensurePathExists(txnBaseNode, null, Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - ensurePathExists(clockNode, null, Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - } - - /** - * This method closes the zookeeper session. - */ - void closeZKConnection() { - if (zkSession != null) { - try { - zkSession.close(); - } catch (InterruptedException e) { - LOG.warn("Close failed: ", e); - } - zkSession = null; - LOG.info("Disconnected to ZooKeeper"); - } - } - - /** - * This method returns a zookeeper session. If the current session is closed, - * then a new session is created. - * - * @return ZooKeeper An instance of zookeeper client. - * @throws IOException - */ - ZooKeeper getSession() throws IOException { - if (zkSession == null || zkSession.getState() == States.CLOSED) { - synchronized (this) { - if (zkSession == null || zkSession.getState() == States.CLOSED) { - zkSession = new ZooKeeper(this.connectString, - this.DEFAULT_SESSION_TIMEOUT, new ZKWatcher()); - while (zkSession.getState() == States.CONNECTING) { - try { - Thread.sleep(1000); - } catch (InterruptedException e) { - } - } - } - } - } - return zkSession; - } - - /** - * This method updates the transaction data related to a znode. - * - * @param path The path to the transaction data. - * @param updateTx The FamilyRevision to be updated. - * @param mode The mode to update like append, update, remove. - * @throws IOException - */ - void updateData(String path, FamilyRevision updateTx, UpdateMode mode) - throws IOException { - - if (updateTx == null) { - throw new IOException( - "The transaction to be updated found to be null."); - } - List currentData = getTransactionList(path); - List newData = new ArrayList(); - boolean dataFound = false; - long updateVersion = updateTx.getRevision(); - for (FamilyRevision tranx : currentData) { - if (tranx.getRevision() != updateVersion) { - newData.add(tranx); - } else { - dataFound = true; - } - } - switch (mode) { - case REMOVE: - if (dataFound == false) { - throw new IOException( - "The transaction to be removed not found in the data."); - } - LOG.info("Removed trasaction : " + updateTx.toString()); - break; - case KEEP_ALIVE: - if (dataFound == false) { - throw new IOException( - "The transaction to be kept alove not found in the data. It might have been expired."); - } - newData.add(updateTx); - LOG.info("keep alive of transaction : " + updateTx.toString()); - break; - case APPEND: - if (dataFound == true) { - throw new IOException( - "The data to be appended already exists."); - } - newData.add(updateTx); - LOG.info("Added transaction : " + updateTx.toString()); - break; - } - - // For serialization purposes. - List newTxnList = new ArrayList(); - for (FamilyRevision wtxn : newData) { - StoreFamilyRevision newTxn = new StoreFamilyRevision(wtxn.getRevision(), - wtxn.getExpireTimestamp()); - newTxnList.add(newTxn); - } - StoreFamilyRevisionList wtxnList = new StoreFamilyRevisionList(newTxnList); - byte[] newByteData = serialize(wtxnList); - - Stat stat = null; - try { - stat = zkSession.setData(path, newByteData, -1); - } catch (KeeperException e) { - throw new IOException( - "Exception while updating trasactional data. ", e); - } catch (InterruptedException e) { - throw new IOException( - "Exception while updating trasactional data. ", e); - } - - if (stat != null) { - LOG.info("Transaction list stored at " + path + "."); - } - - } - - /** - * Refresh transactions on a given transaction data path. - * - * @param path The path to the transaction data. - * @throws IOException Signals that an I/O exception has occurred. - */ - void refreshTransactions(String path) throws IOException { - List currentData = getTransactionList(path); - List newData = new ArrayList(); - - for (FamilyRevision tranx : currentData) { - if (tranx.getExpireTimestamp() > getTimeStamp()) { - newData.add(tranx); - } - } - - if (newData.equals(currentData) == false) { - List newTxnList = new ArrayList(); - for (FamilyRevision wtxn : newData) { - StoreFamilyRevision newTxn = new StoreFamilyRevision(wtxn.getRevision(), - wtxn.getExpireTimestamp()); - newTxnList.add(newTxn); - } - StoreFamilyRevisionList wtxnList = new StoreFamilyRevisionList(newTxnList); - byte[] newByteData = serialize(wtxnList); - - try { - zkSession.setData(path, newByteData, -1); - } catch (KeeperException e) { - throw new IOException( - "Exception while updating trasactional data. ", e); - } catch (InterruptedException e) { - throw new IOException( - "Exception while updating trasactional data. ", e); - } - - } - - } - - /** - * Delete table znodes. - * - * @param tableName the hbase table name - * @throws IOException Signals that an I/O exception has occurred. - */ - void deleteZNodes(String tableName) throws IOException { - String transactionDataTablePath = PathUtil.getTxnDataPath(baseDir, - tableName); - deleteRecursively(transactionDataTablePath); - } - - void deleteRecursively(String path) throws IOException { - try { - List children = getSession().getChildren(path, false); - if (children.size() != 0) { - for (String child : children) { - deleteRecursively(path + "/" + child); - } - } - getSession().delete(path, -1); - } catch (KeeperException e) { - throw new IOException( - "Exception while deleting path " + path + ".", e); - } catch (InterruptedException e) { - throw new IOException( - "Exception while deleting path " + path + ".", e); - } - } - - /** - * This method serializes a given instance of TBase object. - * - * @param obj An instance of TBase - * @return byte array The serialized data. - * @throws IOException - */ - static byte[] serialize(TBase obj) throws IOException { - if (obj == null) - return new byte[0]; - try { - TSerializer serializer = new TSerializer( - new TBinaryProtocol.Factory()); - byte[] bytes = serializer.serialize(obj); - return bytes; - } catch (Exception e) { - throw new IOException("Serialization error: ", e); - } - } - - - /** - * This method deserializes the given byte array into the TBase object. - * - * @param obj An instance of TBase - * @param data Output of deserialization. - * @throws IOException - */ - static void deserialize(TBase obj, byte[] data) throws IOException { - if (data == null || data.length == 0) - return; - try { - TDeserializer deserializer = new TDeserializer( - new TBinaryProtocol.Factory()); - deserializer.deserialize(obj, data); - } catch (Exception e) { - throw new IOException("Deserialization error: " + e.getMessage(), e); - } - } - - private class ZKWatcher implements Watcher { - public void process(WatchedEvent event) { - switch (event.getState()) { - case Expired: - LOG.info("The client session has expired. Try opening a new " - + "session and connecting again."); - zkSession = null; - break; - default: - - } - } - } - -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/LockListener.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/LockListener.java deleted file mode 100644 index 3c5f95b..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/LockListener.java +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase.snapshot.lock; - -/** - * This class has two methods which are call - * back methods when a lock is acquired and - * when the lock is released. - * This class has been used as-is from the zookeeper 3.3.4 recipes minor changes - * in the package name. - */ -public interface LockListener { - /** - * call back called when the lock - * is acquired - */ - public void lockAcquired(); - - /** - * call back called when the lock is - * released. - */ - public void lockReleased(); -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/ProtocolSupport.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/ProtocolSupport.java deleted file mode 100644 index 0f97589..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/ProtocolSupport.java +++ /dev/null @@ -1,195 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase.snapshot.lock; - -import org.apache.zookeeper.CreateMode; -import org.apache.zookeeper.KeeperException; -import org.apache.zookeeper.ZooDefs; -import org.apache.zookeeper.ZooKeeper; -import org.apache.zookeeper.data.ACL; -import org.apache.zookeeper.data.Stat; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.List; -import java.util.concurrent.atomic.AtomicBoolean; - -/** - * A base class for protocol implementations which provides a number of higher - * level helper methods for working with ZooKeeper along with retrying synchronous - * operations if the connection to ZooKeeper closes such as - * {@link #retryOperation(ZooKeeperOperation)} - * This class has been used as-is from the zookeeper 3.4.0 recipes with - * changes in the retry delay, retry count values and package name. - */ -class ProtocolSupport { - private static final Logger LOG = LoggerFactory.getLogger(ProtocolSupport.class); - - protected final ZooKeeper zookeeper; - private AtomicBoolean closed = new AtomicBoolean(false); - private long retryDelay = 500L; - private int retryCount = 3; - private List acl = ZooDefs.Ids.OPEN_ACL_UNSAFE; - - public ProtocolSupport(ZooKeeper zookeeper) { - this.zookeeper = zookeeper; - } - - /** - * Closes this strategy and releases any ZooKeeper resources; but keeps the - * ZooKeeper instance open - */ - public void close() { - if (closed.compareAndSet(false, true)) { - doClose(); - } - } - - /** - * return zookeeper client instance - * @return zookeeper client instance - */ - public ZooKeeper getZookeeper() { - return zookeeper; - } - - /** - * return the acl its using - * @return the acl. - */ - public List getAcl() { - return acl; - } - - /** - * set the acl - * @param acl the acl to set to - */ - public void setAcl(List acl) { - this.acl = acl; - } - - /** - * get the retry delay in milliseconds - * @return the retry delay - */ - public long getRetryDelay() { - return retryDelay; - } - - /** - * Sets the time waited between retry delays - * @param retryDelay the retry delay - */ - public void setRetryDelay(long retryDelay) { - this.retryDelay = retryDelay; - } - - /** - * Allow derived classes to perform - * some custom closing operations to release resources - */ - protected void doClose() { - } - - - /** - * Perform the given operation, retrying if the connection fails - * @return object. it needs to be cast to the callee's expected - * return type. - */ - protected Object retryOperation(ZooKeeperOperation operation) - throws KeeperException, InterruptedException { - KeeperException exception = null; - for (int i = 0; i < retryCount; i++) { - try { - return operation.execute(); - } catch (KeeperException.SessionExpiredException e) { - LOG.warn("Session expired for: " + zookeeper + " so reconnecting due to: " + e, e); - throw e; - } catch (KeeperException.ConnectionLossException e) { - if (exception == null) { - exception = e; - } - LOG.debug("Attempt " + i + " failed with connection loss so " + - "attempting to reconnect: " + e, e); - retryDelay(i); - } - } - throw exception; - } - - /** - * Ensures that the given path exists with no data, the current - * ACL and no flags - * @param path - */ - protected void ensurePathExists(String path) { - ensureExists(path, null, acl, CreateMode.PERSISTENT); - } - - /** - * Ensures that the given path exists with the given data, ACL and flags - * @param path - * @param acl - * @param flags - */ - protected void ensureExists(final String path, final byte[] data, - final List acl, final CreateMode flags) { - try { - retryOperation(new ZooKeeperOperation() { - public boolean execute() throws KeeperException, InterruptedException { - Stat stat = zookeeper.exists(path, false); - if (stat != null) { - return true; - } - zookeeper.create(path, data, acl, flags); - return true; - } - }); - } catch (KeeperException e) { - LOG.warn("Caught: " + e, e); - } catch (InterruptedException e) { - LOG.warn("Caught: " + e, e); - } - } - - /** - * Returns true if this protocol has been closed - * @return true if this protocol is closed - */ - protected boolean isClosed() { - return closed.get(); - } - - /** - * Performs a retry delay if this is not the first attempt - * @param attemptCount the number of the attempts performed so far - */ - protected void retryDelay(int attemptCount) { - if (attemptCount > 0) { - try { - Thread.sleep(attemptCount * retryDelay); - } catch (InterruptedException e) { - LOG.debug("Failed to sleep: " + e, e); - } - } - } -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/WriteLock.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/WriteLock.java deleted file mode 100644 index 6838fe9..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/WriteLock.java +++ /dev/null @@ -1,303 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase.snapshot.lock; - -import org.apache.zookeeper.KeeperException; -import org.apache.zookeeper.WatchedEvent; -import org.apache.zookeeper.Watcher; - -import static org.apache.zookeeper.CreateMode.EPHEMERAL_SEQUENTIAL; - -import org.apache.zookeeper.ZooKeeper; -import org.apache.zookeeper.data.ACL; -import org.apache.zookeeper.data.Stat; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.List; -import java.util.SortedSet; -import java.util.TreeSet; - -/** - * A protocol to implement an exclusive - * write lock or to elect a leader.

You invoke {@link #lock()} to - * start the process of grabbing the lock; you may get the lock then or it may be - * some time later.

You can register a listener so that you are invoked - * when you get the lock; otherwise you can ask if you have the lock - * by calling {@link #isOwner()} - * This class has been used as-is from the zookeeper 3.4.0 recipes. The only change - * made is a TODO for sorting using suffixes and the package name. - */ -public class WriteLock extends ProtocolSupport { - private static final Logger LOG = LoggerFactory.getLogger(WriteLock.class); - - private final String dir; - private String id; - private ZNodeName idName; - private String ownerId; - private String lastChildId; - private byte[] data = {0x12, 0x34}; - private LockListener callback; - private LockZooKeeperOperation zop; - - /** - * zookeeper contructor for writelock - * @param zookeeper zookeeper client instance - * @param dir the parent path you want to use for locking - * @param acl the acls that you want to use for all the paths, - * if null world read/write is used. - */ - public WriteLock(ZooKeeper zookeeper, String dir, List acl) { - super(zookeeper); - this.dir = dir; - if (acl != null) { - setAcl(acl); - } - this.zop = new LockZooKeeperOperation(); - } - - /** - * zookeeper contructor for writelock with callback - * @param zookeeper the zookeeper client instance - * @param dir the parent path you want to use for locking - * @param acl the acls that you want to use for all the paths - * @param callback the call back instance - */ - public WriteLock(ZooKeeper zookeeper, String dir, List acl, - LockListener callback) { - this(zookeeper, dir, acl); - this.callback = callback; - } - - /** - * return the current locklistener - * @return the locklistener - */ - public LockListener getLockListener() { - return this.callback; - } - - /** - * register a different call back listener - * @param callback the call back instance - */ - public void setLockListener(LockListener callback) { - this.callback = callback; - } - - /** - * Removes the lock or associated znode if - * you no longer require the lock. this also - * removes your request in the queue for locking - * in case you do not already hold the lock. - * @throws RuntimeException throws a runtime exception - * if it cannot connect to zookeeper. - */ - public synchronized void unlock() throws RuntimeException { - - if (!isClosed() && id != null) { - // we don't need to retry this operation in the case of failure - // as ZK will remove ephemeral files and we don't wanna hang - // this process when closing if we cannot reconnect to ZK - try { - - ZooKeeperOperation zopdel = new ZooKeeperOperation() { - public boolean execute() throws KeeperException, - InterruptedException { - zookeeper.delete(id, -1); - return Boolean.TRUE; - } - }; - zopdel.execute(); - } catch (InterruptedException e) { - LOG.warn("Caught: " + e, e); - //set that we have been interrupted. - Thread.currentThread().interrupt(); - } catch (KeeperException.NoNodeException e) { - // do nothing - } catch (KeeperException e) { - LOG.warn("Caught: " + e, e); - throw (RuntimeException) new RuntimeException(e.getMessage()). - initCause(e); - } finally { - if (callback != null) { - callback.lockReleased(); - } - id = null; - } - } - } - - /** - * the watcher called on - * getting watch while watching - * my predecessor - */ - private class LockWatcher implements Watcher { - public void process(WatchedEvent event) { - // lets either become the leader or watch the new/updated node - LOG.debug("Watcher fired on path: " + event.getPath() + " state: " + - event.getState() + " type " + event.getType()); - try { - lock(); - } catch (Exception e) { - LOG.warn("Failed to acquire lock: " + e, e); - } - } - } - - /** - * a zoookeeper operation that is mainly responsible - * for all the magic required for locking. - */ - private class LockZooKeeperOperation implements ZooKeeperOperation { - - /** find if we have been created earler if not create our node - * - * @param prefix the prefix node - * @param zookeeper teh zookeeper client - * @param dir the dir paretn - * @throws KeeperException - * @throws InterruptedException - */ - private void findPrefixInChildren(String prefix, ZooKeeper zookeeper, String dir) - throws KeeperException, InterruptedException { - List names = zookeeper.getChildren(dir, false); - for (String name : names) { - if (name.startsWith(prefix)) { - id = name; - if (LOG.isDebugEnabled()) { - LOG.debug("Found id created last time: " + id); - } - break; - } - } - if (id == null) { - id = zookeeper.create(dir + "/" + prefix, data, - getAcl(), EPHEMERAL_SEQUENTIAL); - - if (LOG.isDebugEnabled()) { - LOG.debug("Created id: " + id); - } - } - - } - - /** - * the command that is run and retried for actually - * obtaining the lock - * @return if the command was successful or not - */ - public boolean execute() throws KeeperException, InterruptedException { - do { - if (id == null) { - long sessionId = zookeeper.getSessionId(); - String prefix = "x-" + sessionId + "-"; - // lets try look up the current ID if we failed - // in the middle of creating the znode - findPrefixInChildren(prefix, zookeeper, dir); - idName = new ZNodeName(id); - } - if (id != null) { - List names = zookeeper.getChildren(dir, false); - if (names.isEmpty()) { - LOG.warn("No children in: " + dir + " when we've just " + - "created one! Lets recreate it..."); - // lets force the recreation of the id - id = null; - } else { - // lets sort them explicitly (though they do seem to come back in order ususally :) - SortedSet sortedNames = new TreeSet(); - for (String name : names) { - //TODO: Just use the suffix to sort. - sortedNames.add(new ZNodeName(dir + "/" + name)); - } - ownerId = sortedNames.first().getName(); - SortedSet lessThanMe = sortedNames.headSet(idName); - if (!lessThanMe.isEmpty()) { - ZNodeName lastChildName = lessThanMe.last(); - lastChildId = lastChildName.getName(); - if (LOG.isDebugEnabled()) { - LOG.debug("watching less than me node: " + lastChildId); - } - Stat stat = zookeeper.exists(lastChildId, new LockWatcher()); - if (stat != null) { - return Boolean.FALSE; - } else { - LOG.warn("Could not find the" + - " stats for less than me: " + lastChildName.getName()); - } - } else { - if (isOwner()) { - if (callback != null) { - callback.lockAcquired(); - } - return Boolean.TRUE; - } - } - } - } - } - while (id == null); - return Boolean.FALSE; - } - } - - ; - - /** - * Attempts to acquire the exclusive write lock returning whether or not it was - * acquired. Note that the exclusive lock may be acquired some time later after - * this method has been invoked due to the current lock owner going away. - */ - public synchronized boolean lock() throws KeeperException, InterruptedException { - if (isClosed()) { - return false; - } - ensurePathExists(dir); - - return (Boolean) retryOperation(zop); - } - - /** - * return the parent dir for lock - * @return the parent dir used for locks. - */ - public String getDir() { - return dir; - } - - /** - * Returns true if this node is the owner of the - * lock (or the leader) - */ - public boolean isOwner() { - return id != null && ownerId != null && id.equals(ownerId); - } - - /** - * return the id for this lock - * @return the id for this lock - */ - public String getId() { - return this.id; - } -} - diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/ZNodeName.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/ZNodeName.java deleted file mode 100644 index 51f0f18..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/ZNodeName.java +++ /dev/null @@ -1,113 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase.snapshot.lock; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Represents an ephemeral znode name which has an ordered sequence number - * and can be sorted in order - * This class has been used as-is from the zookeeper 3.4.0 recipes with a - * change in package name. - */ -public class ZNodeName implements Comparable { - private final String name; - private String prefix; - private int sequence = -1; - private static final Logger LOG = LoggerFactory.getLogger(ZNodeName.class); - - public ZNodeName(String name) { - if (name == null) { - throw new NullPointerException("id cannot be null"); - } - this.name = name; - this.prefix = name; - int idx = name.lastIndexOf('-'); - if (idx >= 0) { - this.prefix = name.substring(0, idx); - try { - this.sequence = Integer.parseInt(name.substring(idx + 1)); - // If an exception occurred we misdetected a sequence suffix, - // so return -1. - } catch (NumberFormatException e) { - LOG.info("Number format exception for " + idx, e); - } catch (ArrayIndexOutOfBoundsException e) { - LOG.info("Array out of bounds for " + idx, e); - } - } - } - - @Override - public String toString() { - return name.toString(); - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - ZNodeName sequence = (ZNodeName) o; - - if (!name.equals(sequence.name)) return false; - - return true; - } - - @Override - public int hashCode() { - return name.hashCode() + 37; - } - - public int compareTo(ZNodeName that) { - int answer = this.prefix.compareTo(that.prefix); - if (answer == 0) { - int s1 = this.sequence; - int s2 = that.sequence; - if (s1 == -1 && s2 == -1) { - return this.name.compareTo(that.name); - } - answer = s1 == -1 ? 1 : s2 == -1 ? -1 : s1 - s2; - } - return answer; - } - - /** - * Returns the name of the znode - */ - public String getName() { - return name; - } - - /** - * Returns the sequence number - */ - public int getZNodeName() { - return sequence; - } - - /** - * Returns the text prefix before the sequence number - */ - public String getPrefix() { - return prefix; - } -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/ZooKeeperOperation.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/ZooKeeperOperation.java deleted file mode 100644 index 9291125..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/lock/ZooKeeperOperation.java +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase.snapshot.lock; - -import org.apache.zookeeper.KeeperException; - -/** - * A callback object which can be used for implementing retry-able operations in the - * {@link org.apache.hcatalog.hbase.snapshot.lock.ProtocolSupport} class - * This class has been used as-is from the zookeeper 3.4.0 with change in the - * package name . - */ -public interface ZooKeeperOperation { - - /** - * Performs the operation - which may be involved multiple times if the connection - * to ZooKeeper closes during this operation - * - * @return the result of the operation or null - * @throws KeeperException - * @throws InterruptedException - */ - public boolean execute() throws KeeperException, InterruptedException; -} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/package-info.java hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/package-info.java deleted file mode 100644 index 822b2b6..0000000 --- hcatalog/storage-handlers/hbase/src/java/org/apache/hcatalog/hbase/snapshot/package-info.java +++ /dev/null @@ -1,28 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/** - * Provides a revision manager for data stored in HBase that can be used to implement repeatable reads. - * The component is designed to be usable for revision management of data stored in HBase in general, - * independent and not limited to HCatalog. It is used by the HCatalog HBase storage handler, implementation depends on HBase 0.92+. - *

- * For more information please see - * Snapshots and Repeatable reads for HBase Tables. - * @since 0.4 - */ -package org.apache.hcatalog.hbase.snapshot; diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseAuthorizationProvider.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseAuthorizationProvider.java new file mode 100644 index 0000000..0778b40 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseAuthorizationProvider.java @@ -0,0 +1,144 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase; + +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.ql.metadata.AuthorizationException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider; +import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; +import org.apache.hadoop.hive.ql.security.authorization.Privilege; + +/** + * This class is an implementation of HiveAuthorizationProvider to provide + * authorization functionality for HBase tables. + */ +class HBaseAuthorizationProvider implements HiveAuthorizationProvider { + + @Override + public Configuration getConf() { + return null; + } + + @Override + public void setConf(Configuration conf) { + } + + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #init(org.apache.hadoop.conf.Configuration) + */ + @Override + public void init(Configuration conf) throws HiveException { + } + + @Override + public HiveAuthenticationProvider getAuthenticator() { + return null; + } + + @Override + public void setAuthenticator(HiveAuthenticationProvider authenticator) { + } + + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } + + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.metastore.api.Database, + * org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Database db, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } + + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.ql.metadata.Table, + * org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Table table, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } + + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.ql.metadata.Partition, + * org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Partition part, Privilege[] readRequiredPriv, + Privilege[] writeRequiredPriv) throws HiveException, + AuthorizationException { + } + + /* + * (non-Javadoc) + * + * @see + * org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider + * #authorize(org.apache.hadoop.hive.ql.metadata.Table, + * org.apache.hadoop.hive.ql.metadata.Partition, java.util.List, + * org.apache.hadoop.hive.ql.security.authorization.Privilege[], + * org.apache.hadoop.hive.ql.security.authorization.Privilege[]) + */ + @Override + public void authorize(Table table, Partition part, List columns, + Privilege[] readRequiredPriv, Privilege[] writeRequiredPriv) + throws HiveException, AuthorizationException { + } + +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseBaseOutputFormat.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseBaseOutputFormat.java new file mode 100644 index 0000000..895e100 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseBaseOutputFormat.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase; + +import java.io.IOException; +import java.util.Properties; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hive.ql.io.HiveOutputFormat; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.OutputFormat; +import org.apache.hadoop.mapred.RecordWriter; +import org.apache.hadoop.util.Progressable; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.mapreduce.OutputJobInfo; + +public class HBaseBaseOutputFormat implements OutputFormat, Put>, + HiveOutputFormat, Put> { + + @Override + public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter( + JobConf jc, Path finalOutPath, + Class valueClass, boolean isCompressed, + Properties tableProperties, Progressable progress) + throws IOException { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException { + OutputFormat, Put> outputFormat = getOutputFormat(job); + outputFormat.checkOutputSpecs(ignored, job); + } + + @Override + public RecordWriter, Put> getRecordWriter(FileSystem ignored, + JobConf job, String name, Progressable progress) throws IOException { + OutputFormat, Put> outputFormat = getOutputFormat(job); + return outputFormat.getRecordWriter(ignored, job, name, progress); + } + + private OutputFormat, Put> getOutputFormat(JobConf job) + throws IOException { + String outputInfo = job.get(HCatConstants.HCAT_KEY_OUTPUT_INFO); + OutputJobInfo outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(outputInfo); + OutputFormat, Put> outputFormat = null; + if (HBaseHCatStorageHandler.isBulkMode(outputJobInfo)) { + outputFormat = new HBaseBulkOutputFormat(); + } else { + outputFormat = new HBaseDirectOutputFormat(); + } + return outputFormat; + } +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseBulkOutputFormat.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseBulkOutputFormat.java new file mode 100644 index 0000000..6ba7839 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseBulkOutputFormat.java @@ -0,0 +1,221 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.security.User; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapred.FileOutputCommitter; +import org.apache.hadoop.mapred.FileOutputFormat; +import org.apache.hadoop.mapred.JobClient; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.JobContext; +import org.apache.hadoop.mapred.OutputCommitter; +import org.apache.hadoop.mapred.RecordWriter; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.SequenceFileOutputFormat; +import org.apache.hadoop.mapred.TaskAttemptContext; +import org.apache.hadoop.util.Progressable; +import org.apache.hive.hcatalog.hbase.snapshot.RevisionManager; + +/** + * Class which imports data into HBase via it's "bulk load" feature. Wherein + * regions are created by the MR job using HFileOutputFormat and then later + * "moved" into the appropriate region server. + */ +class HBaseBulkOutputFormat extends HBaseBaseOutputFormat { + + private final static ImmutableBytesWritable EMPTY_LIST = new ImmutableBytesWritable( + new byte[0]); + private SequenceFileOutputFormat, Put> baseOutputFormat; + + public HBaseBulkOutputFormat() { + baseOutputFormat = new SequenceFileOutputFormat, Put>(); + } + + @Override + public void checkOutputSpecs(FileSystem ignored, JobConf job) + throws IOException { + baseOutputFormat.checkOutputSpecs(ignored, job); + HBaseUtil.addHBaseDelegationToken(job); + addJTDelegationToken(job); + } + + @Override + public RecordWriter, Put> getRecordWriter( + FileSystem ignored, JobConf job, String name, Progressable progress) + throws IOException { + job.setOutputKeyClass(ImmutableBytesWritable.class); + job.setOutputValueClass(Put.class); + long version = HBaseRevisionManagerUtil.getOutputRevision(job); + return new HBaseBulkRecordWriter(baseOutputFormat.getRecordWriter( + ignored, job, name, progress), version); + } + + private void addJTDelegationToken(JobConf job) throws IOException { + // Get jobTracker delegation token if security is enabled + // we need to launch the ImportSequenceFile job + if (User.isSecurityEnabled()) { + JobClient jobClient = new JobClient(new JobConf(job)); + try { + job.getCredentials().addToken(new Text("my mr token"), + jobClient.getDelegationToken(null)); + } catch (InterruptedException e) { + throw new IOException("Error while getting JT delegation token", e); + } + } + } + + private static class HBaseBulkRecordWriter implements + RecordWriter, Put> { + + private RecordWriter, Put> baseWriter; + private final Long outputVersion; + + public HBaseBulkRecordWriter( + RecordWriter, Put> baseWriter, + Long outputVersion) { + this.baseWriter = baseWriter; + this.outputVersion = outputVersion; + } + + @Override + public void write(WritableComparable key, Put value) + throws IOException { + Put put = value; + if (outputVersion != null) { + put = new Put(value.getRow(), outputVersion.longValue()); + for (List row : value.getFamilyMap().values()) { + for (KeyValue el : row) { + put.add(el.getFamily(), el.getQualifier(), el.getValue()); + } + } + } + // we ignore the key + baseWriter.write(EMPTY_LIST, put); + } + + @Override + public void close(Reporter reporter) throws IOException { + baseWriter.close(reporter); + } + } + + public static class HBaseBulkOutputCommitter extends OutputCommitter { + + private final OutputCommitter baseOutputCommitter; + + public HBaseBulkOutputCommitter() { + baseOutputCommitter = new FileOutputCommitter(); + } + + @Override + public void abortTask(TaskAttemptContext taskContext) + throws IOException { + baseOutputCommitter.abortTask(taskContext); + } + + @Override + public void commitTask(TaskAttemptContext taskContext) + throws IOException { + // baseOutputCommitter.commitTask(taskContext); + } + + @Override + public boolean needsTaskCommit(TaskAttemptContext taskContext) + throws IOException { + return baseOutputCommitter.needsTaskCommit(taskContext); + } + + @Override + public void setupJob(JobContext jobContext) throws IOException { + baseOutputCommitter.setupJob(jobContext); + } + + @Override + public void setupTask(TaskAttemptContext taskContext) + throws IOException { + baseOutputCommitter.setupTask(taskContext); + } + + @Override + public void abortJob(JobContext jobContext, int status) + throws IOException { + baseOutputCommitter.abortJob(jobContext, status); + RevisionManager rm = null; + try { + rm = HBaseRevisionManagerUtil + .getOpenedRevisionManager(jobContext.getConfiguration()); + rm.abortWriteTransaction(HBaseRevisionManagerUtil + .getWriteTransaction(jobContext.getConfiguration())); + } finally { + cleanIntermediate(jobContext); + if (rm != null) + rm.close(); + } + } + + @Override + public void commitJob(JobContext jobContext) throws IOException { + baseOutputCommitter.commitJob(jobContext); + RevisionManager rm = null; + try { + Configuration conf = jobContext.getConfiguration(); + Path srcPath = FileOutputFormat.getOutputPath(jobContext.getJobConf()); + if (!FileSystem.get(conf).exists(srcPath)) { + throw new IOException("Failed to bulk import hfiles. " + + "Intermediate data directory is cleaned up or missing. " + + "Please look at the bulk import job if it exists for failure reason"); + } + Path destPath = new Path(srcPath.getParent(), srcPath.getName() + "_hfiles"); + boolean success = ImportSequenceFile.runJob(jobContext, + conf.get(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY), + srcPath, + destPath); + if (!success) { + cleanIntermediate(jobContext); + throw new IOException("Failed to bulk import hfiles." + + " Please look at the bulk import job for failure reason"); + } + rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); + rm.commitWriteTransaction(HBaseRevisionManagerUtil.getWriteTransaction(conf)); + cleanIntermediate(jobContext); + } finally { + if (rm != null) + rm.close(); + } + } + + private void cleanIntermediate(JobContext jobContext) + throws IOException { + FileSystem fs = FileSystem.get(jobContext.getConfiguration()); + fs.delete(FileOutputFormat.getOutputPath(jobContext.getJobConf()), true); + } + } +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseConstants.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseConstants.java new file mode 100644 index 0000000..d966881 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseConstants.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase; + +import org.apache.hive.hcatalog.common.HCatConstants; + +/** + * Constants class for constants used in HBase storage handler. + */ +class HBaseConstants { + + /** key used to store write transaction object */ + public static final String PROPERTY_WRITE_TXN_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".hbase.mapreduce.writeTxn"; + + /** key used to define the name of the table to write to */ + public static final String PROPERTY_OUTPUT_TABLE_NAME_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".hbase.mapreduce.outputTableName"; + + /** key used to define whether bulk storage output format will be used or not */ + public static final String PROPERTY_BULK_OUTPUT_MODE_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".hbase.output.bulkMode"; + + /** key used to define the hbase table snapshot. */ + public static final String PROPERTY_TABLE_SNAPSHOT_KEY = HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + "hbase.table.snapshot"; + +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseDirectOutputFormat.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseDirectOutputFormat.java new file mode 100644 index 0000000..797378b --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseDirectOutputFormat.java @@ -0,0 +1,167 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.mapred.TableOutputFormat; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.JobContext; +import org.apache.hadoop.mapred.OutputCommitter; +import org.apache.hadoop.mapred.RecordWriter; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.TaskAttemptContext; +import org.apache.hadoop.util.Progressable; +import org.apache.hive.hcatalog.hbase.snapshot.RevisionManager; +import org.apache.hive.hcatalog.hbase.snapshot.Transaction; + +/** + * "Direct" implementation of OutputFormat for HBase. Uses HTable client's put + * API to write each row to HBase one a time. Presently it is just using + * TableOutputFormat as the underlying implementation in the future we can tune + * this to make the writes faster such as permanently disabling WAL, caching, + * etc. + */ +class HBaseDirectOutputFormat extends HBaseBaseOutputFormat { + + private TableOutputFormat outputFormat; + + public HBaseDirectOutputFormat() { + this.outputFormat = new TableOutputFormat(); + } + + @Override + public RecordWriter, Put> getRecordWriter(FileSystem ignored, + JobConf job, String name, Progressable progress) + throws IOException { + long version = HBaseRevisionManagerUtil.getOutputRevision(job); + return new HBaseDirectRecordWriter(outputFormat.getRecordWriter(ignored, job, name, + progress), version); + } + + @Override + public void checkOutputSpecs(FileSystem ignored, JobConf job) + throws IOException { + outputFormat.checkOutputSpecs(ignored, job); + HBaseUtil.addHBaseDelegationToken(job); + } + + private static class HBaseDirectRecordWriter implements + RecordWriter, Put> { + + private RecordWriter, Put> baseWriter; + private final Long outputVersion; + + public HBaseDirectRecordWriter( + RecordWriter, Put> baseWriter, + Long outputVersion) { + this.baseWriter = baseWriter; + this.outputVersion = outputVersion; + } + + @Override + public void write(WritableComparable key, Put value) + throws IOException { + Put put = value; + if (outputVersion != null) { + put = new Put(value.getRow(), outputVersion.longValue()); + for (List row : value.getFamilyMap().values()) { + for (KeyValue el : row) { + put.add(el.getFamily(), el.getQualifier(), el.getValue()); + } + } + } + baseWriter.write(key, put); + } + + @Override + public void close(Reporter reporter) throws IOException { + baseWriter.close(reporter); + } + + } + + public static class HBaseDirectOutputCommitter extends OutputCommitter { + + public HBaseDirectOutputCommitter() throws IOException { + } + + @Override + public void abortTask(TaskAttemptContext taskContext) + throws IOException { + } + + @Override + public void commitTask(TaskAttemptContext taskContext) + throws IOException { + } + + @Override + public boolean needsTaskCommit(TaskAttemptContext taskContext) + throws IOException { + return false; + } + + @Override + public void setupJob(JobContext jobContext) throws IOException { + } + + @Override + public void setupTask(TaskAttemptContext taskContext) + throws IOException { + } + + @Override + public void abortJob(JobContext jobContext, int status) + throws IOException { + super.abortJob(jobContext, status); + RevisionManager rm = null; + try { + rm = HBaseRevisionManagerUtil + .getOpenedRevisionManager(jobContext.getConfiguration()); + Transaction writeTransaction = HBaseRevisionManagerUtil + .getWriteTransaction(jobContext.getConfiguration()); + rm.abortWriteTransaction(writeTransaction); + } finally { + if (rm != null) + rm.close(); + } + } + + @Override + public void commitJob(JobContext jobContext) throws IOException { + RevisionManager rm = null; + try { + rm = HBaseRevisionManagerUtil + .getOpenedRevisionManager(jobContext.getConfiguration()); + rm.commitWriteTransaction(HBaseRevisionManagerUtil.getWriteTransaction(jobContext + .getConfiguration())); + } finally { + if (rm != null) + rm.close(); + } + } + } +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseHCatStorageHandler.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseHCatStorageHandler.java new file mode 100644 index 0000000..d886a61 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseHCatStorageHandler.java @@ -0,0 +1,610 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.MasterNotRunningException; +import org.apache.hadoop.hbase.ZooKeeperConnectionException; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.mapred.TableOutputFormat; +import org.apache.hadoop.hbase.mapreduce.TableInputFormat; +import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hive.hbase.HBaseSerDe; +import org.apache.hadoop.hive.metastore.HiveMetaHook; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.OutputFormat; +import org.apache.hadoop.util.StringUtils; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.hbase.HBaseBulkOutputFormat.HBaseBulkOutputCommitter; +import org.apache.hive.hcatalog.hbase.HBaseDirectOutputFormat.HBaseDirectOutputCommitter; +import org.apache.hive.hcatalog.hbase.snapshot.RevisionManager; +import org.apache.hive.hcatalog.hbase.snapshot.RevisionManagerConfiguration; +import org.apache.hive.hcatalog.hbase.snapshot.Transaction; +import org.apache.hive.hcatalog.mapreduce.HCatOutputFormat; +import org.apache.hive.hcatalog.mapreduce.HCatTableInfo; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; +import org.apache.hive.hcatalog.mapreduce.OutputJobInfo; +import org.apache.hive.hcatalog.mapreduce.HCatStorageHandler; +import org.apache.thrift.TBase; +import org.apache.zookeeper.ZooKeeper; + +import com.facebook.fb303.FacebookBase; +import com.google.common.util.concurrent.ThreadFactoryBuilder; + +/** + * This class HBaseHCatStorageHandler provides functionality to create HBase + * tables through HCatalog. The implementation is very similar to the + * HiveHBaseStorageHandler, with more details to suit HCatalog. + */ +public class HBaseHCatStorageHandler extends HCatStorageHandler implements HiveMetaHook, Configurable { + + public final static String DEFAULT_PREFIX = "default."; + private final static String PROPERTY_INT_OUTPUT_LOCATION = "hcat.hbase.mapreduce.intermediateOutputLocation"; + + private Configuration hbaseConf; + private Configuration jobConf; + private HBaseAdmin admin; + + @Override + public void configureInputJobProperties(TableDesc tableDesc, Map jobProperties) { + // Populate jobProperties with input table name, table columns, RM snapshot, + // hbase-default.xml and hbase-site.xml + Map tableJobProperties = tableDesc.getJobProperties(); + String jobString = tableJobProperties.get(HCatConstants.HCAT_KEY_JOB_INFO); + try { + InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(jobString); + HCatTableInfo tableInfo = inputJobInfo.getTableInfo(); + String qualifiedTableName = HBaseHCatStorageHandler.getFullyQualifiedHBaseTableName(tableInfo); + jobProperties.put(TableInputFormat.INPUT_TABLE, qualifiedTableName); + + Configuration jobConf = getJobConf(); + addResources(jobConf, jobProperties); + JobConf copyOfConf = new JobConf(jobConf); + HBaseConfiguration.addHbaseResources(copyOfConf); + //Getting hbase delegation token in getInputSplits does not work with PIG. So need to + //do it here + if (jobConf instanceof JobConf) { //Should be the case + HBaseUtil.addHBaseDelegationToken(copyOfConf); + ((JobConf) jobConf).getCredentials().addAll(copyOfConf.getCredentials()); + } + + String outputSchema = jobConf.get(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA); + jobProperties.put(TableInputFormat.SCAN_COLUMNS, getScanColumns(tableInfo, outputSchema)); + + String serSnapshot = (String) inputJobInfo.getProperties().get( + HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY); + if (serSnapshot == null) { + HCatTableSnapshot snapshot = + HBaseRevisionManagerUtil.createSnapshot( + RevisionManagerConfiguration.create(copyOfConf), + qualifiedTableName, tableInfo); + jobProperties.put(HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY, + HCatUtil.serialize(snapshot)); + } + + //This adds it directly to the jobConf. Setting in jobProperties does not get propagated + //to JobConf as of now as the jobProperties is maintained per partition + //TODO: Remove when HCAT-308 is fixed + addOutputDependencyJars(jobConf); + jobProperties.put("tmpjars", jobConf.get("tmpjars")); + + } catch (IOException e) { + throw new IllegalStateException("Error while configuring job properties", e); + } + } + + @Override + public void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties) { + // Populate jobProperties with output table name, hbase-default.xml, hbase-site.xml, OutputJobInfo + // Populate RM transaction in OutputJobInfo + // In case of bulk mode, populate intermediate output location + Map tableJobProperties = tableDesc.getJobProperties(); + String jobString = tableJobProperties.get(HCatConstants.HCAT_KEY_OUTPUT_INFO); + try { + OutputJobInfo outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(jobString); + HCatTableInfo tableInfo = outputJobInfo.getTableInfo(); + String qualifiedTableName = HBaseHCatStorageHandler.getFullyQualifiedHBaseTableName(tableInfo); + jobProperties.put(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, qualifiedTableName); + jobProperties.put(TableOutputFormat.OUTPUT_TABLE, qualifiedTableName); + + Configuration jobConf = getJobConf(); + addResources(jobConf, jobProperties); + + Configuration copyOfConf = new Configuration(jobConf); + HBaseConfiguration.addHbaseResources(copyOfConf); + + String txnString = outputJobInfo.getProperties().getProperty( + HBaseConstants.PROPERTY_WRITE_TXN_KEY); + Transaction txn = null; + if (txnString == null) { + txn = HBaseRevisionManagerUtil.beginWriteTransaction(qualifiedTableName, tableInfo, + RevisionManagerConfiguration.create(copyOfConf)); + String serializedTxn = HCatUtil.serialize(txn); + outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, + serializedTxn); + } else { + txn = (Transaction) HCatUtil.deserialize(txnString); + } + if (isBulkMode(outputJobInfo)) { + String tableLocation = tableInfo.getTableLocation(); + String location = new Path(tableLocation, "REVISION_" + txn.getRevisionNumber()) + .toString(); + outputJobInfo.getProperties().setProperty(PROPERTY_INT_OUTPUT_LOCATION, location); + // We are writing out an intermediate sequenceFile hence + // location is not passed in OutputJobInfo.getLocation() + // TODO replace this with a mapreduce constant when available + jobProperties.put("mapred.output.dir", location); + jobProperties.put("mapred.output.committer.class", HBaseBulkOutputCommitter.class.getName()); + } else { + jobProperties.put("mapred.output.committer.class", HBaseDirectOutputCommitter.class.getName()); + } + + jobProperties.put(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); + addOutputDependencyJars(jobConf); + jobProperties.put("tmpjars", jobConf.get("tmpjars")); + + } catch (IOException e) { + throw new IllegalStateException("Error while configuring job properties", e); + } + } + + /* + * @return instance of HiveAuthorizationProvider + * + * @throws HiveException + * + * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler# + * getAuthorizationProvider() + */ + @Override + public HiveAuthorizationProvider getAuthorizationProvider() + throws HiveException { + + HBaseAuthorizationProvider hbaseAuth = new HBaseAuthorizationProvider(); + hbaseAuth.init(getConf()); + return hbaseAuth; + } + + /* + * @param table + * + * @throws MetaException + * + * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler + * #commitCreateTable(org.apache.hadoop.hive.metastore.api.Table) + */ + @Override + public void commitCreateTable(Table table) throws MetaException { + } + + /* + * @param instance of table + * + * @param deleteData + * + * @throws MetaException + * + * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler + * #commitDropTable(org.apache.hadoop.hive.metastore.api.Table, boolean) + */ + @Override + public void commitDropTable(Table tbl, boolean deleteData) + throws MetaException { + checkDeleteTable(tbl); + + } + + /* + * @param instance of table + * + * @throws MetaException + * + * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler + * #preCreateTable(org.apache.hadoop.hive.metastore.api.Table) + */ + @Override + public void preCreateTable(Table tbl) throws MetaException { + boolean isExternal = MetaStoreUtils.isExternalTable(tbl); + + hbaseConf = getConf(); + + if (tbl.getSd().getLocation() != null) { + throw new MetaException("LOCATION may not be specified for HBase."); + } + + try { + String tableName = getFullyQualifiedHBaseTableName(tbl); + String hbaseColumnsMapping = tbl.getParameters().get( + HBaseSerDe.HBASE_COLUMNS_MAPPING); + + if (hbaseColumnsMapping == null) { + throw new MetaException( + "No hbase.columns.mapping defined in table" + + " properties."); + } + + List hbaseColumnFamilies = new ArrayList(); + List hbaseColumnQualifiers = new ArrayList(); + List hbaseColumnFamiliesBytes = new ArrayList(); + int iKey = HBaseUtil.parseColumnMapping(hbaseColumnsMapping, + hbaseColumnFamilies, hbaseColumnFamiliesBytes, + hbaseColumnQualifiers, null); + + HTableDescriptor tableDesc; + Set uniqueColumnFamilies = new HashSet(); + if (!getHBaseAdmin().tableExists(tableName)) { + // if it is not an external table then create one + if (!isExternal) { + // Create the column descriptors + tableDesc = new HTableDescriptor(tableName); + uniqueColumnFamilies.addAll(hbaseColumnFamilies); + uniqueColumnFamilies.remove(hbaseColumnFamilies.get(iKey)); + + for (String columnFamily : uniqueColumnFamilies) { + HColumnDescriptor familyDesc = new HColumnDescriptor(Bytes + .toBytes(columnFamily)); + familyDesc.setMaxVersions(Integer.MAX_VALUE); + tableDesc.addFamily(familyDesc); + } + + getHBaseAdmin().createTable(tableDesc); + } else { + // an external table + throw new MetaException("HBase table " + tableName + + " doesn't exist while the table is " + + "declared as an external table."); + } + + } else { + if (!isExternal) { + throw new MetaException("Table " + tableName + + " already exists within HBase." + + " Use CREATE EXTERNAL TABLE instead to" + + " register it in HCatalog."); + } + // make sure the schema mapping is right + tableDesc = getHBaseAdmin().getTableDescriptor( + Bytes.toBytes(tableName)); + + for (int i = 0; i < hbaseColumnFamilies.size(); i++) { + if (i == iKey) { + continue; + } + + if (!tableDesc.hasFamily(hbaseColumnFamiliesBytes.get(i))) { + throw new MetaException("Column Family " + + hbaseColumnFamilies.get(i) + + " is not defined in hbase table " + tableName); + } + } + } + + // ensure the table is online + new HTable(hbaseConf, tableDesc.getName()); + + //Set up table in revision manager. + RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hbaseConf); + rm.createTable(tableName, new ArrayList(uniqueColumnFamilies)); + + } catch (MasterNotRunningException mnre) { + throw new MetaException(StringUtils.stringifyException(mnre)); + } catch (IOException ie) { + throw new MetaException(StringUtils.stringifyException(ie)); + } catch (IllegalArgumentException iae) { + throw new MetaException(StringUtils.stringifyException(iae)); + } + + } + + /* + * @param table + * + * @throws MetaException + * + * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler + * #preDropTable(org.apache.hadoop.hive.metastore.api.Table) + */ + @Override + public void preDropTable(Table table) throws MetaException { + } + + /* + * @param table + * + * @throws MetaException + * + * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler + * #rollbackCreateTable(org.apache.hadoop.hive.metastore.api.Table) + */ + @Override + public void rollbackCreateTable(Table table) throws MetaException { + checkDeleteTable(table); + } + + /* + * @param table + * + * @throws MetaException + * + * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler + * #rollbackDropTable(org.apache.hadoop.hive.metastore.api.Table) + */ + @Override + public void rollbackDropTable(Table table) throws MetaException { + } + + /* + * @return instance of HiveMetaHook + * + * @see org.apache.hive.hcatalog.storagehandler.HCatStorageHandler#getMetaHook() + */ + @Override + public HiveMetaHook getMetaHook() { + return this; + } + + private HBaseAdmin getHBaseAdmin() throws MetaException { + try { + if (admin == null) { + admin = new HBaseAdmin(this.getConf()); + } + return admin; + } catch (MasterNotRunningException mnre) { + throw new MetaException(StringUtils.stringifyException(mnre)); + } catch (ZooKeeperConnectionException zkce) { + throw new MetaException(StringUtils.stringifyException(zkce)); + } + } + + private String getFullyQualifiedHBaseTableName(Table tbl) { + String tableName = tbl.getParameters().get(HBaseSerDe.HBASE_TABLE_NAME); + if (tableName == null) { + tableName = tbl.getSd().getSerdeInfo().getParameters() + .get(HBaseSerDe.HBASE_TABLE_NAME); + } + if (tableName == null) { + if (tbl.getDbName().equals(MetaStoreUtils.DEFAULT_DATABASE_NAME)) { + tableName = tbl.getTableName(); + } else { + tableName = tbl.getDbName() + "." + tbl.getTableName(); + } + tableName = tableName.toLowerCase(); + } + return tableName; + } + + static String getFullyQualifiedHBaseTableName(HCatTableInfo tableInfo) { + String qualifiedName = tableInfo.getStorerInfo().getProperties() + .getProperty(HBaseSerDe.HBASE_TABLE_NAME); + if (qualifiedName == null) { + String databaseName = tableInfo.getDatabaseName(); + String tableName = tableInfo.getTableName(); + if ((databaseName == null) + || (databaseName.equals(MetaStoreUtils.DEFAULT_DATABASE_NAME))) { + qualifiedName = tableName; + } else { + qualifiedName = databaseName + "." + tableName; + } + qualifiedName = qualifiedName.toLowerCase(); + } + return qualifiedName; + } + + @Override + public Class getInputFormatClass() { + return HBaseInputFormat.class; + } + + @Override + public Class getOutputFormatClass() { + return HBaseBaseOutputFormat.class; + } + + /* + * @return subclass of SerDe + * + * @throws UnsupportedOperationException + * + * @see + * org.apache.hive.hcatalog.storagehandler.HCatStorageHandler#getSerDeClass() + */ + @Override + public Class getSerDeClass() + throws UnsupportedOperationException { + return HBaseSerDe.class; + } + + public Configuration getJobConf() { + return jobConf; + } + + @Override + public Configuration getConf() { + + if (hbaseConf == null) { + hbaseConf = HBaseConfiguration.create(); + } + return hbaseConf; + } + + @Override + public void setConf(Configuration conf) { + //setConf is called both during DDL operations and mapred read/write jobs. + //Creating a copy of conf for DDL and adding hbase-default and hbase-site.xml to it. + //For jobs, maintaining a reference instead of cloning as we need to + // 1) add hbase delegation token to the Credentials. + // 2) set tmpjars on it. Putting in jobProperties does not get propagated to JobConf + // in case of InputFormat as they are maintained per partition. + //Not adding hbase-default.xml and hbase-site.xml to jobConf as it will override any + //hbase properties set in the JobConf by the user. In configureInputJobProperties and + //configureOutputJobProperties, we take care of adding the default properties + //that are not already present. TODO: Change to a copy for jobs after HCAT-308 is fixed. + jobConf = conf; + hbaseConf = RevisionManagerConfiguration.create(HBaseConfiguration.create(conf)); + } + + private void checkDeleteTable(Table table) throws MetaException { + boolean isExternal = MetaStoreUtils.isExternalTable(table); + String tableName = getFullyQualifiedHBaseTableName(table); + RevisionManager rm = null; + try { + if (!isExternal && getHBaseAdmin().tableExists(tableName)) { + // we have created an HBase table, so we delete it to roll back; + if (getHBaseAdmin().isTableEnabled(tableName)) { + getHBaseAdmin().disableTable(tableName); + } + getHBaseAdmin().deleteTable(tableName); + + //Drop table in revision manager. + rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hbaseConf); + rm.dropTable(tableName); + } + } catch (IOException ie) { + throw new MetaException(StringUtils.stringifyException(ie)); + } finally { + HBaseRevisionManagerUtil.closeRevisionManagerQuietly(rm); + } + } + + /** + * Helper method for users to add the required depedency jars to distributed cache. + * @param conf + * @throws IOException + */ + private void addOutputDependencyJars(Configuration conf) throws IOException { + TableMapReduceUtil.addDependencyJars(conf, + //ZK + ZooKeeper.class, + //HBase + HTable.class, + //Hive + HiveException.class, + //HCatalog jar + HCatOutputFormat.class, + //hcat hbase storage handler jar + HBaseHCatStorageHandler.class, + //hive hbase storage handler jar + HBaseSerDe.class, + //hive jar + Table.class, + //libthrift jar + TBase.class, + //hbase jar + Bytes.class, + //thrift-fb303 .jar + FacebookBase.class, + //guava jar + ThreadFactoryBuilder.class); + } + + /** + * Utility method to add hbase-default.xml and hbase-site.xml properties to a new map + * if they are not already present in the jobConf. + * @param jobConf Job configuration + * @param newJobProperties Map to which new properties should be added + */ + private void addResources(Configuration jobConf, + Map newJobProperties) { + Configuration conf = new Configuration(false); + HBaseConfiguration.addHbaseResources(conf); + RevisionManagerConfiguration.addResources(conf); + for (Entry entry : conf) { + if (jobConf.get(entry.getKey()) == null) + newJobProperties.put(entry.getKey(), entry.getValue()); + } + } + + public static boolean isBulkMode(OutputJobInfo outputJobInfo) { + //Default is false + String bulkMode = outputJobInfo.getTableInfo().getStorerInfo().getProperties() + .getProperty(HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY, + "false"); + return "true".equals(bulkMode); + } + + private String getScanColumns(HCatTableInfo tableInfo, String outputColSchema) throws IOException { + StringBuilder builder = new StringBuilder(); + String hbaseColumnMapping = tableInfo.getStorerInfo().getProperties() + .getProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING); + if (outputColSchema == null) { + String[] splits = hbaseColumnMapping.split("[,]"); + for (int i = 0; i < splits.length; i++) { + if (!splits[i].equals(HBaseSerDe.HBASE_KEY_COL)) + builder.append(splits[i]).append(" "); + } + } else { + HCatSchema outputSchema = (HCatSchema) HCatUtil.deserialize(outputColSchema); + HCatSchema tableSchema = tableInfo.getDataColumns(); + List outputFieldNames = outputSchema.getFieldNames(); + List outputColumnMapping = new ArrayList(); + for (String fieldName : outputFieldNames) { + int position = tableSchema.getPosition(fieldName); + outputColumnMapping.add(position); + } + List columnFamilies = new ArrayList(); + List columnQualifiers = new ArrayList(); + HBaseUtil.parseColumnMapping(hbaseColumnMapping, columnFamilies, null, + columnQualifiers, null); + for (int i = 0; i < outputColumnMapping.size(); i++) { + int cfIndex = outputColumnMapping.get(i); + String cf = columnFamilies.get(cfIndex); + // We skip the key column. + if (cf.equals(HBaseSerDe.HBASE_KEY_COL) == false) { + String qualifier = columnQualifiers.get(i); + builder.append(cf); + builder.append(":"); + if (qualifier != null) { + builder.append(qualifier); + } + builder.append(" "); + } + } + } + //Remove the extra space delimiter + builder.deleteCharAt(builder.length() - 1); + return builder.toString(); + } + +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseInputFormat.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseInputFormat.java new file mode 100644 index 0000000..79959cc --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseInputFormat.java @@ -0,0 +1,126 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.mapred.TableSplit; +import org.apache.hadoop.hbase.mapreduce.TableInputFormat; +import org.apache.hive.hcatalog.mapreduce.HCatMapRedUtil; +import org.apache.hadoop.mapred.InputFormat; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; + +/** + * This class HBaseInputFormat is a wrapper class of TableInputFormat in HBase. + */ +class HBaseInputFormat implements InputFormat { + + private final TableInputFormat inputFormat; + + public HBaseInputFormat() { + inputFormat = new TableInputFormat(); + } + + /* + * @param instance of InputSplit + * + * @param instance of TaskAttemptContext + * + * @return RecordReader + * + * @throws IOException + * + * @throws InterruptedException + * + * @see + * org.apache.hadoop.mapreduce.InputFormat#createRecordReader(org.apache + * .hadoop.mapreduce.InputSplit, + * org.apache.hadoop.mapreduce.TaskAttemptContext) + */ + @Override + public RecordReader getRecordReader( + InputSplit split, JobConf job, Reporter reporter) + throws IOException { + String jobString = job.get(HCatConstants.HCAT_KEY_JOB_INFO); + InputJobInfo inputJobInfo = (InputJobInfo) HCatUtil.deserialize(jobString); + + String tableName = job.get(TableInputFormat.INPUT_TABLE); + TableSplit tSplit = (TableSplit) split; + HbaseSnapshotRecordReader recordReader = new HbaseSnapshotRecordReader(inputJobInfo, job); + inputFormat.setConf(job); + Scan inputScan = inputFormat.getScan(); + // TODO: Make the caching configurable by the user + inputScan.setCaching(200); + inputScan.setCacheBlocks(false); + Scan sc = new Scan(inputScan); + sc.setStartRow(tSplit.getStartRow()); + sc.setStopRow(tSplit.getEndRow()); + recordReader.setScan(sc); + recordReader.setHTable(new HTable(job, tableName)); + recordReader.init(); + return recordReader; + } + + /* + * @param jobContext + * + * @return List of InputSplit + * + * @throws IOException + * + * @throws InterruptedException + * + * @see + * org.apache.hadoop.mapreduce.InputFormat#getSplits(org.apache.hadoop.mapreduce + * .JobContext) + */ + @Override + public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf job, int numSplits) + throws IOException { + inputFormat.setConf(job); + return convertSplits(inputFormat.getSplits(HCatMapRedUtil.createJobContext(job, null, + Reporter.NULL))); + } + + private InputSplit[] convertSplits(List splits) { + InputSplit[] converted = new InputSplit[splits.size()]; + for (int i = 0; i < splits.size(); i++) { + org.apache.hadoop.hbase.mapreduce.TableSplit tableSplit = + (org.apache.hadoop.hbase.mapreduce.TableSplit) splits.get(i); + TableSplit newTableSplit = new TableSplit(tableSplit.getTableName(), + tableSplit.getStartRow(), + tableSplit.getEndRow(), tableSplit.getRegionLocation()); + converted[i] = newTableSplit; + } + return converted; + } + +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseRevisionManagerUtil.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseRevisionManagerUtil.java new file mode 100644 index 0000000..979d4ce --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseRevisionManagerUtil.java @@ -0,0 +1,257 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.hbase.HBaseSerDe; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.hbase.snapshot.RevisionManager; +import org.apache.hive.hcatalog.hbase.snapshot.RevisionManagerFactory; +import org.apache.hive.hcatalog.hbase.snapshot.TableSnapshot; +import org.apache.hive.hcatalog.hbase.snapshot.Transaction; +import org.apache.hive.hcatalog.mapreduce.HCatTableInfo; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; +import org.apache.hive.hcatalog.mapreduce.OutputJobInfo; +import org.apache.hive.hcatalog.mapreduce.StorerInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * The Class HBaseRevisionManagerUtil has utility methods to interact with Revision Manager + * + */ +class HBaseRevisionManagerUtil { + + private final static Logger LOG = LoggerFactory.getLogger(HBaseRevisionManagerUtil.class); + + private HBaseRevisionManagerUtil() { + } + + /** + * Creates the latest snapshot of the table. + * + * @param jobConf The job configuration. + * @param hbaseTableName The fully qualified name of the HBase table. + * @param tableInfo HCat table information + * @return An instance of HCatTableSnapshot + * @throws IOException Signals that an I/O exception has occurred. + */ + static HCatTableSnapshot createSnapshot(Configuration jobConf, + String hbaseTableName, HCatTableInfo tableInfo) throws IOException { + + RevisionManager rm = null; + TableSnapshot snpt; + try { + rm = getOpenedRevisionManager(jobConf); + snpt = rm.createSnapshot(hbaseTableName); + } finally { + closeRevisionManagerQuietly(rm); + } + + HCatTableSnapshot hcatSnapshot = HBaseRevisionManagerUtil.convertSnapshot(snpt, tableInfo); + return hcatSnapshot; + } + + /** + * Creates the snapshot using the revision specified by the user. + * + * @param jobConf The job configuration. + * @param tableName The fully qualified name of the table whose snapshot is being taken. + * @param revision The revision number to use for the snapshot. + * @return An instance of HCatTableSnapshot. + * @throws IOException Signals that an I/O exception has occurred. + */ + static HCatTableSnapshot createSnapshot(Configuration jobConf, + String tableName, long revision) + throws IOException { + + TableSnapshot snpt; + RevisionManager rm = null; + try { + rm = getOpenedRevisionManager(jobConf); + snpt = rm.createSnapshot(tableName, revision); + } finally { + closeRevisionManagerQuietly(rm); + } + + String inputJobString = jobConf.get(HCatConstants.HCAT_KEY_JOB_INFO); + if (inputJobString == null) { + throw new IOException( + "InputJobInfo information not found in JobContext. " + + "HCatInputFormat.setInput() not called?"); + } + InputJobInfo inputInfo = (InputJobInfo) HCatUtil.deserialize(inputJobString); + HCatTableSnapshot hcatSnapshot = HBaseRevisionManagerUtil + .convertSnapshot(snpt, inputInfo.getTableInfo()); + + return hcatSnapshot; + } + + /** + * Gets an instance of revision manager which is opened. + * + * @param jobConf The job configuration. + * @return RevisionManager An instance of revision manager. + * @throws IOException + */ + static RevisionManager getOpenedRevisionManager(Configuration jobConf) throws IOException { + return RevisionManagerFactory.getOpenedRevisionManager(jobConf); + } + + static void closeRevisionManagerQuietly(RevisionManager rm) { + if (rm != null) { + try { + rm.close(); + } catch (IOException e) { + LOG.warn("Error while trying to close revision manager", e); + } + } + } + + + static HCatTableSnapshot convertSnapshot(TableSnapshot hbaseSnapshot, + HCatTableInfo hcatTableInfo) throws IOException { + + HCatSchema hcatTableSchema = hcatTableInfo.getDataColumns(); + Map hcatHbaseColMap = getHCatHBaseColumnMapping(hcatTableInfo); + HashMap revisionMap = new HashMap(); + + for (HCatFieldSchema fSchema : hcatTableSchema.getFields()) { + if (hcatHbaseColMap.containsKey(fSchema.getName())) { + String colFamily = hcatHbaseColMap.get(fSchema.getName()); + long revisionID = hbaseSnapshot.getRevision(colFamily); + revisionMap.put(fSchema.getName(), revisionID); + } + } + + HCatTableSnapshot hcatSnapshot = new HCatTableSnapshot( + hcatTableInfo.getDatabaseName(), hcatTableInfo.getTableName(), revisionMap, hbaseSnapshot.getLatestRevision()); + return hcatSnapshot; + } + + static TableSnapshot convertSnapshot(HCatTableSnapshot hcatSnapshot, + HCatTableInfo hcatTableInfo) throws IOException { + + HCatSchema hcatTableSchema = hcatTableInfo.getDataColumns(); + Map revisionMap = new HashMap(); + Map hcatHbaseColMap = getHCatHBaseColumnMapping(hcatTableInfo); + for (HCatFieldSchema fSchema : hcatTableSchema.getFields()) { + String colFamily = hcatHbaseColMap.get(fSchema.getName()); + if (hcatSnapshot.containsColumn(fSchema.getName())) { + long revision = hcatSnapshot.getRevision(fSchema.getName()); + revisionMap.put(colFamily, revision); + } + } + + String fullyQualifiedName = hcatSnapshot.getDatabaseName() + "." + + hcatSnapshot.getTableName(); + return new TableSnapshot(fullyQualifiedName, revisionMap, hcatSnapshot.getLatestRevision()); + + } + + /** + * Begins a transaction in the revision manager for the given table. + * @param qualifiedTableName Name of the table + * @param tableInfo HCat Table information + * @param jobConf Job Configuration + * @return The new transaction in revision manager + * @throws IOException + */ + static Transaction beginWriteTransaction(String qualifiedTableName, + HCatTableInfo tableInfo, Configuration jobConf) throws IOException { + Transaction txn; + RevisionManager rm = null; + try { + rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(jobConf); + String hBaseColumns = tableInfo.getStorerInfo().getProperties() + .getProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING); + String[] splits = hBaseColumns.split("[,:]"); + Set families = new HashSet(); + for (int i = 0; i < splits.length; i += 2) { + if (!splits[i].isEmpty()) + families.add(splits[i]); + } + txn = rm.beginWriteTransaction(qualifiedTableName, new ArrayList(families)); + } finally { + HBaseRevisionManagerUtil.closeRevisionManagerQuietly(rm); + } + return txn; + } + + static Transaction getWriteTransaction(Configuration conf) throws IOException { + OutputJobInfo outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); + return (Transaction) HCatUtil.deserialize(outputJobInfo.getProperties() + .getProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY)); + } + + static void setWriteTransaction(Configuration conf, Transaction txn) throws IOException { + OutputJobInfo outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(conf.get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); + outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, HCatUtil.serialize(txn)); + conf.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo)); + } + + /** + * Get the Revision number that will be assigned to this job's output data + * @param conf configuration of the job + * @return the revision number used + * @throws IOException + */ + static long getOutputRevision(Configuration conf) throws IOException { + return getWriteTransaction(conf).getRevisionNumber(); + } + + private static Map getHCatHBaseColumnMapping(HCatTableInfo hcatTableInfo) + throws IOException { + + HCatSchema hcatTableSchema = hcatTableInfo.getDataColumns(); + StorerInfo storeInfo = hcatTableInfo.getStorerInfo(); + String hbaseColumnMapping = storeInfo.getProperties().getProperty( + HBaseSerDe.HBASE_COLUMNS_MAPPING); + + Map hcatHbaseColMap = new HashMap(); + List columnFamilies = new ArrayList(); + List columnQualifiers = new ArrayList(); + HBaseUtil.parseColumnMapping(hbaseColumnMapping, columnFamilies, + null, columnQualifiers, null); + + for (HCatFieldSchema column : hcatTableSchema.getFields()) { + int fieldPos = hcatTableSchema.getPosition(column.getName()); + String colFamily = columnFamilies.get(fieldPos); + if (colFamily.equals(HBaseSerDe.HBASE_KEY_COL) == false) { + hcatHbaseColMap.put(column.getName(), colFamily); + } + } + + return hcatHbaseColMap; + } + +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseUtil.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseUtil.java new file mode 100644 index 0000000..0651bc6 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HBaseUtil.java @@ -0,0 +1,159 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.hbase.security.User; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hive.hbase.HBaseSerDe; +import org.apache.hadoop.mapred.JobConf; + +class HBaseUtil { + + private HBaseUtil() { + } + + /** + * Parses the HBase columns mapping to identify the column families, qualifiers + * and also caches the byte arrays corresponding to them. One of the HCat table + * columns maps to the HBase row key, by default the first column. + * + * @param columnMapping - the column mapping specification to be parsed + * @param colFamilies - the list of HBase column family names + * @param colFamiliesBytes - the corresponding byte array + * @param colQualifiers - the list of HBase column qualifier names + * @param colQualifiersBytes - the corresponding byte array + * @return the row key index in the column names list + * @throws IOException + */ + static int parseColumnMapping( + String columnMapping, + List colFamilies, + List colFamiliesBytes, + List colQualifiers, + List colQualifiersBytes) throws IOException { + + int rowKeyIndex = -1; + + if (colFamilies == null || colQualifiers == null) { + throw new IllegalArgumentException("Error: caller must pass in lists for the column families " + + "and qualifiers."); + } + + colFamilies.clear(); + colQualifiers.clear(); + + if (columnMapping == null) { + throw new IllegalArgumentException("Error: hbase.columns.mapping missing for this HBase table."); + } + + if (columnMapping.equals("") || columnMapping.equals(HBaseSerDe.HBASE_KEY_COL)) { + throw new IllegalArgumentException("Error: hbase.columns.mapping specifies only the HBase table" + + " row key. A valid Hive-HBase table must specify at least one additional column."); + } + + String[] mapping = columnMapping.split(","); + + for (int i = 0; i < mapping.length; i++) { + String elem = mapping[i]; + int idxFirst = elem.indexOf(":"); + int idxLast = elem.lastIndexOf(":"); + + if (idxFirst < 0 || !(idxFirst == idxLast)) { + throw new IllegalArgumentException("Error: the HBase columns mapping contains a badly formed " + + "column family, column qualifier specification."); + } + + if (elem.equals(HBaseSerDe.HBASE_KEY_COL)) { + rowKeyIndex = i; + colFamilies.add(elem); + colQualifiers.add(null); + } else { + String[] parts = elem.split(":"); + assert (parts.length > 0 && parts.length <= 2); + colFamilies.add(parts[0]); + + if (parts.length == 2) { + colQualifiers.add(parts[1]); + } else { + colQualifiers.add(null); + } + } + } + + if (rowKeyIndex == -1) { + colFamilies.add(0, HBaseSerDe.HBASE_KEY_COL); + colQualifiers.add(0, null); + rowKeyIndex = 0; + } + + if (colFamilies.size() != colQualifiers.size()) { + throw new IOException("Error in parsing the hbase columns mapping."); + } + + // populate the corresponding byte [] if the client has passed in a non-null list + if (colFamiliesBytes != null) { + colFamiliesBytes.clear(); + + for (String fam : colFamilies) { + colFamiliesBytes.add(Bytes.toBytes(fam)); + } + } + + if (colQualifiersBytes != null) { + colQualifiersBytes.clear(); + + for (String qual : colQualifiers) { + if (qual == null) { + colQualifiersBytes.add(null); + } else { + colQualifiersBytes.add(Bytes.toBytes(qual)); + } + } + } + + if (colFamiliesBytes != null && colQualifiersBytes != null) { + if (colFamiliesBytes.size() != colQualifiersBytes.size()) { + throw new IOException("Error in caching the bytes for the hbase column families " + + "and qualifiers."); + } + } + + return rowKeyIndex; + } + + /** + * Get delegation token from hbase and add it to JobConf + * @param job + * @throws IOException + */ + static void addHBaseDelegationToken(JobConf job) throws IOException { + if (User.isHBaseSecurityEnabled(job)) { + try { + User.getCurrent().obtainAuthTokenForJob(job); + } catch (InterruptedException e) { + throw new IOException("Error while obtaining hbase delegation token", e); + } + } + } + +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HCatTableSnapshot.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HCatTableSnapshot.java new file mode 100644 index 0000000..63d7414 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HCatTableSnapshot.java @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.hbase; + +import java.io.Serializable; +import java.util.Map; + + +/** + * The class HCatTableSnapshot represents a snapshot of a hcatalog table. + * This class is intended to be opaque. This class would used only by the + * record readers to obtain knowledge about the revisions of a + * column to be filtered. + */ +public class HCatTableSnapshot implements Serializable { + + private static final long serialVersionUID = 1L; + private String tableName; + private String databaseName; + private Map columnMap; + private long latestRevision; + + HCatTableSnapshot(String databaseName, String tableName, Map columnMap, long latestRevision) { + this.tableName = tableName; + this.databaseName = databaseName; + this.columnMap = columnMap; + this.latestRevision = latestRevision; + } + + /** + * @return The name of the table in the snapshot. + */ + public String getTableName() { + return this.tableName; + } + + /** + * @return The name of the database to which the table snapshot belongs. + */ + public String getDatabaseName() { + return this.databaseName; + } + + /** + * @return The revision number of a column in a snapshot. + */ + long getRevision(String column) { + if (columnMap.containsKey(column)) + return this.columnMap.get(column); + return latestRevision; + } + + /** + * The method checks if the snapshot contains information about a data column. + * + * @param column The data column of the table + * @return true, if successful + */ + boolean containsColumn(String column) { + return this.columnMap.containsKey(column); + } + + /** + * @return latest committed revision when snapshot was taken + */ + long getLatestRevision() { + return latestRevision; + } + + @Override + public String toString() { + String snapshot = " Database Name: " + this.databaseName + " Table Name : " + tableName + + "Latest Revision: " + latestRevision + " Column revision : " + columnMap.toString(); + return snapshot; + } +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HbaseSnapshotRecordReader.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HbaseSnapshotRecordReader.java new file mode 100644 index 0000000..5450a6f --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/HbaseSnapshotRecordReader.java @@ -0,0 +1,255 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.hbase; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.DataInputBuffer; +import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.hbase.snapshot.FamilyRevision; +import org.apache.hive.hcatalog.hbase.snapshot.RevisionManager; +import org.apache.hive.hcatalog.hbase.snapshot.TableSnapshot; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The Class HbaseSnapshotRecordReader implements logic for filtering records + * based on snapshot. + */ +class HbaseSnapshotRecordReader implements RecordReader { + + static final Logger LOG = LoggerFactory.getLogger(HbaseSnapshotRecordReader.class); + private final InputJobInfo inpJobInfo; + private final Configuration conf; + private final int maxRevisions = 1; + private ResultScanner scanner; + private Scan scan; + private HTable htable; + private TableSnapshot snapshot; + private Iterator resultItr; + private Set allAbortedTransactions; + private DataOutputBuffer valueOut = new DataOutputBuffer(); + private DataInputBuffer valueIn = new DataInputBuffer(); + + HbaseSnapshotRecordReader(InputJobInfo inputJobInfo, Configuration conf) throws IOException { + this.inpJobInfo = inputJobInfo; + this.conf = conf; + String snapshotString = conf.get(HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY); + HCatTableSnapshot hcatSnapshot = (HCatTableSnapshot) HCatUtil + .deserialize(snapshotString); + this.snapshot = HBaseRevisionManagerUtil.convertSnapshot(hcatSnapshot, + inpJobInfo.getTableInfo()); + } + + public void init() throws IOException { + restart(scan.getStartRow()); + } + + public void restart(byte[] firstRow) throws IOException { + allAbortedTransactions = getAbortedTransactions(Bytes.toString(htable.getTableName()), scan); + long maxValidRevision = getMaximumRevision(scan, snapshot); + while (allAbortedTransactions.contains(maxValidRevision)) { + maxValidRevision--; + } + Scan newScan = new Scan(scan); + newScan.setStartRow(firstRow); + //TODO: See if filters in 0.92 can be used to optimize the scan + //TODO: Consider create a custom snapshot filter + //TODO: Make min revision a constant in RM + newScan.setTimeRange(0, maxValidRevision + 1); + newScan.setMaxVersions(); + this.scanner = this.htable.getScanner(newScan); + resultItr = this.scanner.iterator(); + } + + private Set getAbortedTransactions(String tableName, Scan scan) throws IOException { + Set abortedTransactions = new HashSet(); + RevisionManager rm = null; + try { + rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); + byte[][] families = scan.getFamilies(); + for (byte[] familyKey : families) { + String family = Bytes.toString(familyKey); + List abortedWriteTransactions = rm.getAbortedWriteTransactions( + tableName, family); + if (abortedWriteTransactions != null) { + for (FamilyRevision revision : abortedWriteTransactions) { + abortedTransactions.add(revision.getRevision()); + } + } + } + return abortedTransactions; + } finally { + HBaseRevisionManagerUtil.closeRevisionManagerQuietly(rm); + } + } + + private long getMaximumRevision(Scan scan, TableSnapshot snapshot) { + long maxRevision = 0; + byte[][] families = scan.getFamilies(); + for (byte[] familyKey : families) { + String family = Bytes.toString(familyKey); + long revision = snapshot.getRevision(family); + if (revision > maxRevision) + maxRevision = revision; + } + return maxRevision; + } + + /* + * @param htable The HTable ( of HBase) to use for the record reader. + * + */ + public void setHTable(HTable htable) { + this.htable = htable; + } + + /* + * @param scan The scan to be used for reading records. + * + */ + public void setScan(Scan scan) { + this.scan = scan; + } + + @Override + public ImmutableBytesWritable createKey() { + return new ImmutableBytesWritable(); + } + + @Override + public Result createValue() { + return new Result(); + } + + @Override + public long getPos() { + // This should be the ordinal tuple in the range; + // not clear how to calculate... + return 0; + } + + @Override + public float getProgress() throws IOException { + // Depends on the total number of tuples + return 0; + } + + @Override + public boolean next(ImmutableBytesWritable key, Result value) throws IOException { + if (this.resultItr == null) { + LOG.warn("The HBase result iterator is found null. It is possible" + + " that the record reader has already been closed."); + } else { + while (resultItr.hasNext()) { + Result temp = resultItr.next(); + Result hbaseRow = prepareResult(temp.list()); + if (hbaseRow != null) { + // Update key and value. Currently no way to avoid serialization/de-serialization + // as no setters are available. + key.set(hbaseRow.getRow()); + valueOut.reset(); + hbaseRow.write(valueOut); + valueIn.reset(valueOut.getData(), valueOut.getLength()); + value.readFields(valueIn); + return true; + } + + } + } + return false; + } + + private Result prepareResult(List keyvalues) { + + List finalKeyVals = new ArrayList(); + Map> qualValMap = new HashMap>(); + for (KeyValue kv : keyvalues) { + byte[] cf = kv.getFamily(); + byte[] qualifier = kv.getQualifier(); + String key = Bytes.toString(cf) + ":" + Bytes.toString(qualifier); + List kvs; + if (qualValMap.containsKey(key)) { + kvs = qualValMap.get(key); + } else { + kvs = new ArrayList(); + } + + String family = Bytes.toString(kv.getFamily()); + //Ignore aborted transactions + if (allAbortedTransactions.contains(kv.getTimestamp())) { + continue; + } + + long desiredTS = snapshot.getRevision(family); + if (kv.getTimestamp() <= desiredTS) { + kvs.add(kv); + } + qualValMap.put(key, kvs); + } + + Set keys = qualValMap.keySet(); + for (String cf : keys) { + List kvs = qualValMap.get(cf); + if (maxRevisions <= kvs.size()) { + for (int i = 0; i < maxRevisions; i++) { + finalKeyVals.add(kvs.get(i)); + } + } else { + finalKeyVals.addAll(kvs); + } + } + + if (finalKeyVals.size() == 0) { + return null; + } else { + KeyValue[] kvArray = new KeyValue[finalKeyVals.size()]; + finalKeyVals.toArray(kvArray); + return new Result(kvArray); + } + } + + /* + * @see org.apache.hadoop.hbase.mapred.TableRecordReader#close() + */ + @Override + public void close() { + this.resultItr = null; + this.scanner.close(); + } + +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/ImportSequenceFile.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/ImportSequenceFile.java new file mode 100644 index 0000000..a84bf7f --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/ImportSequenceFile.java @@ -0,0 +1,252 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase; + +import org.apache.hadoop.filecache.DistributedCache; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat; +import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles; +import org.apache.hadoop.hbase.mapreduce.PutSortReducer; +import org.apache.hadoop.hbase.mapreduce.hadoopbackport.TotalOrderPartitioner; + +import java.io.IOException; +import java.net.URI; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.JobStatus; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hive.hcatalog.mapreduce.HCatMapRedUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.hadoop.hbase.mapreduce.hadoopbackport.TotalOrderPartitioner.DEFAULT_PATH; + + +/** + * MapReduce job which reads a series of Puts stored in a sequence file + * and imports the data into HBase. It needs to create the necessary HBase + * regions using HFileOutputFormat and then notify the correct region servers + * to doBulkLoad(). This will be used After an MR job has written the SequenceFile + * and data needs to be bulk loaded onto HBase. + */ +class ImportSequenceFile { + private final static Logger LOG = LoggerFactory.getLogger(ImportSequenceFile.class); + private final static String NAME = "HCatImportSequenceFile"; + private final static String IMPORTER_WORK_DIR = "_IMPORTER_MR_WORK_DIR"; + + + private static class SequenceFileImporter extends Mapper { + + @Override + public void map(ImmutableBytesWritable rowKey, Put value, + Context context) + throws IOException { + try { + context.write(new ImmutableBytesWritable(value.getRow()), value); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + } + + private static class ImporterOutputFormat extends HFileOutputFormat { + @Override + public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException { + final OutputCommitter baseOutputCommitter = super.getOutputCommitter(context); + + return new OutputCommitter() { + @Override + public void setupJob(JobContext jobContext) throws IOException { + baseOutputCommitter.setupJob(jobContext); + } + + @Override + public void setupTask(TaskAttemptContext taskContext) throws IOException { + baseOutputCommitter.setupTask(taskContext); + } + + @Override + public boolean needsTaskCommit(TaskAttemptContext taskContext) throws IOException { + return baseOutputCommitter.needsTaskCommit(taskContext); + } + + @Override + public void commitTask(TaskAttemptContext taskContext) throws IOException { + baseOutputCommitter.commitTask(taskContext); + } + + @Override + public void abortTask(TaskAttemptContext taskContext) throws IOException { + baseOutputCommitter.abortTask(taskContext); + } + + @Override + public void abortJob(JobContext jobContext, JobStatus.State state) throws IOException { + try { + baseOutputCommitter.abortJob(jobContext, state); + } finally { + cleanupScratch(jobContext); + } + } + + @Override + public void commitJob(JobContext jobContext) throws IOException { + try { + baseOutputCommitter.commitJob(jobContext); + Configuration conf = jobContext.getConfiguration(); + try { + //import hfiles + new LoadIncrementalHFiles(conf) + .doBulkLoad(HFileOutputFormat.getOutputPath(jobContext), + new HTable(conf, + conf.get(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY))); + } catch (Exception e) { + throw new IOException("BulkLoad failed.", e); + } + } finally { + cleanupScratch(jobContext); + } + } + + @Override + public void cleanupJob(JobContext context) throws IOException { + try { + baseOutputCommitter.cleanupJob(context); + } finally { + cleanupScratch(context); + } + } + + private void cleanupScratch(JobContext context) throws IOException { + FileSystem fs = FileSystem.get(context.getConfiguration()); + fs.delete(HFileOutputFormat.getOutputPath(context), true); + } + }; + } + } + + private static Job createSubmittableJob(Configuration conf, String tableName, Path inputDir, Path scratchDir, boolean localMode) + throws IOException { + Job job = new Job(conf, NAME + "_" + tableName); + job.setJarByClass(SequenceFileImporter.class); + FileInputFormat.setInputPaths(job, inputDir); + job.setInputFormatClass(SequenceFileInputFormat.class); + job.setMapperClass(SequenceFileImporter.class); + + HTable table = new HTable(conf, tableName); + job.setReducerClass(PutSortReducer.class); + FileOutputFormat.setOutputPath(job, scratchDir); + job.setMapOutputKeyClass(ImmutableBytesWritable.class); + job.setMapOutputValueClass(Put.class); + HFileOutputFormat.configureIncrementalLoad(job, table); + //override OutputFormatClass with our own so we can include cleanup in the committer + job.setOutputFormatClass(ImporterOutputFormat.class); + + //local mode doesn't support symbolic links so we have to manually set the actual path + if (localMode) { + String partitionFile = null; + for (URI uri : DistributedCache.getCacheFiles(job.getConfiguration())) { + if (DEFAULT_PATH.equals(uri.getFragment())) { + partitionFile = uri.toString(); + break; + } + } + partitionFile = partitionFile.substring(0, partitionFile.lastIndexOf("#")); + job.getConfiguration().set(TotalOrderPartitioner.PARTITIONER_PATH, partitionFile.toString()); + } + + return job; + } + + /** + * Method to run the Importer MapReduce Job. Normally will be called by another MR job + * during OutputCommitter.commitJob(). + * @param parentContext JobContext of the parent job + * @param tableName name of table to bulk load data into + * @param InputDir path of SequenceFile formatted data to read + * @param scratchDir temporary path for the Importer MR job to build the HFiles which will be imported + * @return + */ + static boolean runJob(JobContext parentContext, String tableName, Path InputDir, Path scratchDir) { + Configuration parentConf = parentContext.getConfiguration(); + Configuration conf = new Configuration(); + for (Map.Entry el : parentConf) { + if (el.getKey().startsWith("hbase.")) + conf.set(el.getKey(), el.getValue()); + if (el.getKey().startsWith("mapred.cache.archives")) + conf.set(el.getKey(), el.getValue()); + } + + //Inherit jar dependencies added to distributed cache loaded by parent job + conf.set("mapred.job.classpath.archives", parentConf.get("mapred.job.classpath.archives", "")); + conf.set("mapreduce.job.cache.archives.visibilities", parentConf.get("mapreduce.job.cache.archives.visibilities", "")); + + //Temporary fix until hbase security is ready + //We need the written HFile to be world readable so + //hbase regionserver user has the privileges to perform a hdfs move + if (parentConf.getBoolean("hadoop.security.authorization", false)) { + FsPermission.setUMask(conf, FsPermission.valueOf("----------")); + } + + conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); + conf.setBoolean(JobContext.JOB_CANCEL_DELEGATION_TOKEN, false); + + boolean localMode = "local".equals(conf.get("mapred.job.tracker")); + + boolean success = false; + try { + FileSystem fs = FileSystem.get(parentConf); + Path workDir = new Path(new Job(parentConf).getWorkingDirectory(), IMPORTER_WORK_DIR); + if (!fs.mkdirs(workDir)) + throw new IOException("Importer work directory already exists: " + workDir); + Job job = createSubmittableJob(conf, tableName, InputDir, scratchDir, localMode); + job.setWorkingDirectory(workDir); + job.getCredentials().addAll(parentContext.getCredentials()); + success = job.waitForCompletion(true); + fs.delete(workDir, true); + //We only cleanup on success because failure might've been caused by existence of target directory + if (localMode && success) { + new ImporterOutputFormat().getOutputCommitter(HCatMapRedUtil.createTaskAttemptContext(conf, new TaskAttemptID())).commitJob(job); + } + } catch (InterruptedException e) { + LOG.error("ImportSequenceFile Failed", e); + } catch (ClassNotFoundException e) { + LOG.error("ImportSequenceFile Failed", e); + } catch (IOException e) { + LOG.error("ImportSequenceFile Failed", e); + } + return success; + } + +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/ResultConverter.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/ResultConverter.java new file mode 100644 index 0000000..4a7f0cb --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/ResultConverter.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase; + +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hive.hcatalog.data.HCatRecord; + +import java.io.IOException; + +/** + * Interface used to define conversion of HCatRecord to and from Native HBase write (Put) and read (Result) objects. + * How the actual mapping is defined between an HBase Table's schema and an HCatalog Table's schema + * is up to the underlying implementation + */ +interface ResultConverter { + + /** + * convert HCatRecord instance to an HBase Put, used when writing out data. + * @param record instance to convert + * @return converted Put instance + * @throws IOException + */ + Put convert(HCatRecord record) throws IOException; + + /** + * convert HBase Result to HCatRecord instance, used when reading data. + * @param result instance to convert + * @return converted Result instance + * @throws IOException + */ + HCatRecord convert(Result result) throws IOException; + + /** + * Returns the hbase columns that are required for the scan. + * @return String containing hbase columns delimited by space. + * @throws IOException + */ + String getHBaseScanColumns() throws IOException; + +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/FamilyRevision.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/FamilyRevision.java new file mode 100644 index 0000000..a4c4350 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/FamilyRevision.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.hbase.snapshot; + + +/** + * A FamiliyRevision class consists of a revision number and a expiration + * timestamp. When a write transaction starts, the transaction + * object is appended to the transaction list of the each column + * family and stored in the corresponding znode. When a write transaction is + * committed, the transaction object is removed from the list. + */ +public class FamilyRevision implements + Comparable { + + private long revision; + + private long timestamp; + + /** + * Create a FamilyRevision object + * @param rev revision number + * @param ts expiration timestamp + */ + FamilyRevision(long rev, long ts) { + this.revision = rev; + this.timestamp = ts; + } + + public long getRevision() { + return revision; + } + + public long getExpireTimestamp() { + return timestamp; + } + + void setExpireTimestamp(long ts) { + timestamp = ts; + } + + @Override + public String toString() { + String description = "revision: " + revision + " ts: " + timestamp; + return description; + } + + @Override + public int compareTo(FamilyRevision o) { + long d = revision - o.getRevision(); + return (d < 0) ? -1 : (d > 0) ? 1 : 0; + } + + +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/IDGenerator.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/IDGenerator.java new file mode 100644 index 0000000..8881f03 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/IDGenerator.java @@ -0,0 +1,145 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.hbase.snapshot; + +import java.io.IOException; +import java.nio.charset.Charset; + +import org.apache.hive.hcatalog.hbase.snapshot.lock.LockListener; +import org.apache.hive.hcatalog.hbase.snapshot.lock.WriteLock; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.ZooDefs.Ids; +import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.data.Stat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * This class generates revision id's for transactions. + */ +class IDGenerator implements LockListener { + + private ZooKeeper zookeeper; + private String zNodeDataLoc; + private String zNodeLockBasePath; + private long id; + private static final Logger LOG = LoggerFactory.getLogger(IDGenerator.class); + + IDGenerator(ZooKeeper zookeeper, String tableName, String idGenNode) + throws IOException { + this.zookeeper = zookeeper; + this.zNodeDataLoc = idGenNode; + this.zNodeLockBasePath = PathUtil.getLockManagementNode(idGenNode); + } + + /** + * This method obtains a revision id for a transaction. + * + * @return revision ID + * @throws IOException + */ + public long obtainID() throws IOException { + WriteLock wLock = new WriteLock(zookeeper, zNodeLockBasePath, Ids.OPEN_ACL_UNSAFE); + wLock.setLockListener(this); + try { + boolean lockGrabbed = wLock.lock(); + if (lockGrabbed == false) { + //TO DO : Let this request queue up and try obtaining lock. + throw new IOException("Unable to obtain lock to obtain id."); + } else { + id = incrementAndReadCounter(); + } + } catch (KeeperException e) { + LOG.warn("Exception while obtaining lock for ID.", e); + throw new IOException("Exception while obtaining lock for ID.", e); + } catch (InterruptedException e) { + LOG.warn("Exception while obtaining lock for ID.", e); + throw new IOException("Exception while obtaining lock for ID.", e); + } finally { + wLock.unlock(); + } + return id; + } + + /** + * This method reads the latest revision ID that has been used. The ID + * returned by this method cannot be used for transaction. + * @return revision ID + * @throws IOException + */ + public long readID() throws IOException { + long curId; + try { + Stat stat = new Stat(); + byte[] data = zookeeper.getData(this.zNodeDataLoc, false, stat); + curId = Long.parseLong(new String(data, Charset.forName("UTF-8"))); + } catch (KeeperException e) { + LOG.warn("Exception while reading current revision id.", e); + throw new IOException("Exception while reading current revision id.", e); + } catch (InterruptedException e) { + LOG.warn("Exception while reading current revision id.", e); + throw new IOException("Exception while reading current revision id.", e); + } + + return curId; + } + + + private long incrementAndReadCounter() throws IOException { + + long curId, usedId; + try { + Stat stat = new Stat(); + byte[] data = zookeeper.getData(this.zNodeDataLoc, false, stat); + usedId = Long.parseLong((new String(data, Charset.forName("UTF-8")))); + curId = usedId + 1; + String lastUsedID = String.valueOf(curId); + zookeeper.setData(this.zNodeDataLoc, lastUsedID.getBytes(Charset.forName("UTF-8")), -1); + + } catch (KeeperException e) { + LOG.warn("Exception while incrementing revision id.", e); + throw new IOException("Exception while incrementing revision id. ", e); + } catch (InterruptedException e) { + LOG.warn("Exception while incrementing revision id.", e); + throw new IOException("Exception while incrementing revision id. ", e); + } + + return curId; + } + + /* + * @see org.apache.hive.hcatalog.hbase.snapshot.lock.LockListener#lockAcquired() + */ + @Override + public void lockAcquired() { + + + } + + /* + * @see org.apache.hive.hcatalog.hbase.snapshot.lock.LockListener#lockReleased() + */ + @Override + public void lockReleased() { + + } + + +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/PathUtil.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/PathUtil.java new file mode 100644 index 0000000..94e9975 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/PathUtil.java @@ -0,0 +1,132 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.hbase.snapshot; + + +/** + * The PathUtil class is a utility class to provide information about various + * znode paths. The following is the znode structure used for storing information. + * baseDir/ClockNode + * baseDir/TrasactionBasePath + * baseDir/TrasactionBasePath/TableA/revisionID + * baseDir/TrasactionBasePath/TableA/columnFamily-1 + * baseDir/TrasactionBasePath/TableA/columnFamily-1/runnningTxns + * baseDir/TrasactionBasePath/TableA/columnFamily-1/abortedTxns + * baseDir/TrasactionBasePath/TableB/revisionID + * baseDir/TrasactionBasePath/TableB/columnFamily-1 + * baseDir/TrasactionBasePath/TableB/columnFamily-1/runnningTxns + * baseDir/TrasactionBasePath/TableB/columnFamily-1/abortedTxns + + */ +public class PathUtil { + + static final String DATA_DIR = "/data"; + static final String CLOCK_NODE = "/clock"; + + /** + * This method returns the data path associated with the currently + * running transactions of a given table and column/column family. + * @param baseDir + * @param tableName + * @param columnFamily + * @return The path of the running transactions data. + */ + static String getRunningTxnInfoPath(String baseDir, String tableName, + String columnFamily) { + String txnBasePath = getTransactionBasePath(baseDir); + String path = txnBasePath + "/" + tableName + "/" + columnFamily + + "/runningTxns"; + return path; + } + + /** + * This method returns the data path associated with the aborted + * transactions of a given table and column/column family. + * @param baseDir The base directory for revision management. + * @param tableName The name of the table. + * @param columnFamily + * @return The path of the aborted transactions data. + */ + static String getAbortInformationPath(String baseDir, String tableName, + String columnFamily) { + String txnBasePath = getTransactionBasePath(baseDir); + String path = txnBasePath + "/" + tableName + "/" + columnFamily + + "/abortData"; + return path; + } + + /** + * Gets the revision id node for a given table. + * + * @param baseDir the base dir for revision management. + * @param tableName the table name + * @return the revision id node path. + */ + static String getRevisionIDNode(String baseDir, String tableName) { + String rmBasePath = getTransactionBasePath(baseDir); + String revisionIDNode = rmBasePath + "/" + tableName + "/idgen"; + return revisionIDNode; + } + + /** + * Gets the lock management node for any znode that needs to be locked. + * + * @param path the path of the znode. + * @return the lock management node path. + */ + static String getLockManagementNode(String path) { + String lockNode = path + "_locknode_"; + return lockNode; + } + + /** + * This method returns the base path for the transaction data. + * + * @param baseDir The base dir for revision management. + * @return The base path for the transaction data. + */ + static String getTransactionBasePath(String baseDir) { + String txnBaseNode = baseDir + DATA_DIR; + return txnBaseNode; + } + + /** + * Gets the txn data path for a given table. + * + * @param baseDir the base dir for revision management. + * @param tableName the table name + * @return the txn data path for the table. + */ + static String getTxnDataPath(String baseDir, String tableName) { + String txnBasePath = getTransactionBasePath(baseDir); + String path = txnBasePath + "/" + tableName; + return path; + } + + /** + * This method returns the data path for clock node. + * + * @param baseDir + * @return The data path for clock. + */ + static String getClockPath(String baseDir) { + String clockNode = baseDir + CLOCK_NODE; + return clockNode; + } +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/RMConstants.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/RMConstants.java new file mode 100644 index 0000000..494457e --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/RMConstants.java @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase.snapshot; + +public class RMConstants { + public static final String REVISION_MGR_ENDPOINT_IMPL_CLASS = "revision.manager.endpoint.impl.class"; + + public static final String WRITE_TRANSACTION_TIMEOUT = "revision.manager.writeTxn.timeout"; + + public static final String ZOOKEEPER_HOSTLIST = "revision.manager.zk.hostList"; + + public static final String ZOOKEEPER_DATADIR = "revision.manager.zk.dataDir"; +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/RevisionManager.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/RevisionManager.java new file mode 100644 index 0000000..d147d56 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/RevisionManager.java @@ -0,0 +1,148 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.hbase.snapshot; + +import org.apache.hadoop.conf.Configuration; + +import java.io.IOException; +import java.util.List; + +/** + * This interface provides APIs for implementing revision management. + */ +public interface RevisionManager { + /** + * Version property required by HBase to use this interface + * for CoprocessorProtocol / RPC. + */ + public static final long VERSION = 1L; // do not change + + /** + * Initialize the revision manager. + */ + public void initialize(Configuration conf); + + /** + * Opens the revision manager. + * + * @throws IOException + */ + public void open() throws IOException; + + /** + * Closes the revision manager. + * + * @throws IOException + */ + public void close() throws IOException; + + /** + * Setup revision management for a newly created hbase table. + * @param table the hbase table name + * @param columnFamilies the column families in the table + */ + public void createTable(String table, List columnFamilies) throws IOException; + + /** + * Remove table data from revision manager for a dropped table. + * @param table the hbase table name + */ + public void dropTable(String table) throws IOException; + + /** + * Start the write transaction. + * + * @param table + * @param families + * @return a new Transaction + * @throws IOException + */ + public Transaction beginWriteTransaction(String table, List families) + throws IOException; + + /** + * Start the write transaction. + * + * @param table + * @param families + * @param keepAlive + * @return a new Transaction + * @throws IOException + */ + public Transaction beginWriteTransaction(String table, + List families, long keepAlive) throws IOException; + + /** + * Commit the write transaction. + * + * @param transaction + * @throws IOException + */ + public void commitWriteTransaction(Transaction transaction) + throws IOException; + + /** + * Abort the write transaction. + * + * @param transaction + * @throws IOException + */ + public void abortWriteTransaction(Transaction transaction) + throws IOException; + + /** + * Get the list of aborted Transactions for a column family + * + * @param table the table name + * @param columnFamily the column family name + * @return a list of aborted WriteTransactions + * @throws java.io.IOException + */ + public List getAbortedWriteTransactions(String table, + String columnFamily) throws IOException; + + /** + * Create the latest snapshot of the table. + * + * @param tableName + * @return a new snapshot + * @throws IOException + */ + public TableSnapshot createSnapshot(String tableName) throws IOException; + + /** + * Create the snapshot of the table using the revision number. + * + * @param tableName + * @param revision + * @return a new snapshot + * @throws IOException + */ + public TableSnapshot createSnapshot(String tableName, long revision) + throws IOException; + + /** + * Extends the expiration of a transaction by the time indicated by keep alive. + * + * @param transaction + * @throws IOException + */ + public void keepAlive(Transaction transaction) throws IOException; + +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/RevisionManagerConfiguration.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/RevisionManagerConfiguration.java new file mode 100644 index 0000000..93ae8ca --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/RevisionManagerConfiguration.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase.snapshot; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseConfiguration; + +public class RevisionManagerConfiguration { + + + public static Configuration addResources(Configuration conf) { + conf.addDefaultResource("revision-manager-default.xml"); + conf.addResource("revision-manager-site.xml"); + return conf; + } + + /** + * Creates a Configuration with Revision Manager resources + * @return a Configuration with Revision Manager resources + */ + public static Configuration create() { + Configuration conf = new Configuration(); + return addResources(conf); + } + + /** + * Creates a clone of passed configuration. + * @param that Configuration to clone. + * @return a Configuration created with the revision-manager-*.xml files plus + * the given configuration. + */ + public static Configuration create(final Configuration that) { + Configuration conf = create(); + //we need to merge things instead of doing new Configuration(that) + //because of a bug in Configuration wherein the config + //set on the MR fronted will get loaded on the backend as resouce called job.xml + //hence adding resources on the backed could potentially overwrite properties + //set on the frontend which we shouldn't be doing here + HBaseConfiguration.merge(conf, that); + return conf; + } +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/RevisionManagerEndpoint.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/RevisionManagerEndpoint.java new file mode 100644 index 0000000..a761566 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/RevisionManagerEndpoint.java @@ -0,0 +1,141 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.hbase.snapshot; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.CoprocessorEnvironment; +import org.apache.hadoop.hbase.coprocessor.BaseEndpointCoprocessor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Implementation of RevisionManager as HBase RPC endpoint. This class will control the lifecycle of + * and delegate to the actual RevisionManager implementation and make it available as a service + * hosted in the HBase region server (instead of running it in the client (storage handler). + * In the case of {@link ZKBasedRevisionManager} now only the region servers need write access to + * manage revision data. + */ +public class RevisionManagerEndpoint extends BaseEndpointCoprocessor implements RevisionManagerProtocol { + + private static final Logger LOGGER = + LoggerFactory.getLogger(RevisionManagerEndpoint.class.getName()); + + private RevisionManager rmImpl = null; + + @Override + public void start(CoprocessorEnvironment env) { + super.start(env); + try { + Configuration conf = RevisionManagerConfiguration.create(env.getConfiguration()); + String className = conf.get(RMConstants.REVISION_MGR_ENDPOINT_IMPL_CLASS, + ZKBasedRevisionManager.class.getName()); + LOGGER.debug("Using Revision Manager implementation: {}", className); + rmImpl = RevisionManagerFactory.getOpenedRevisionManager(className, conf); + } catch (IOException e) { + LOGGER.error("Failed to initialize revision manager", e); + } + } + + @Override + public void stop(CoprocessorEnvironment env) { + if (rmImpl != null) { + try { + rmImpl.close(); + } catch (IOException e) { + LOGGER.warn("Error closing revision manager.", e); + } + } + super.stop(env); + } + + @Override + public void initialize(Configuration conf) { + // do nothing, HBase controls life cycle + } + + @Override + public void open() throws IOException { + // do nothing, HBase controls life cycle + } + + @Override + public void close() throws IOException { + // do nothing, HBase controls life cycle + } + + @Override + public void createTable(String table, List columnFamilies) throws IOException { + rmImpl.createTable(table, columnFamilies); + } + + @Override + public void dropTable(String table) throws IOException { + rmImpl.dropTable(table); + } + + @Override + public Transaction beginWriteTransaction(String table, List families) + throws IOException { + return rmImpl.beginWriteTransaction(table, families); + } + + @Override + public Transaction beginWriteTransaction(String table, + List families, long keepAlive) throws IOException { + return rmImpl.beginWriteTransaction(table, families, keepAlive); + } + + @Override + public void commitWriteTransaction(Transaction transaction) + throws IOException { + rmImpl.commitWriteTransaction(transaction); + } + + @Override + public void abortWriteTransaction(Transaction transaction) + throws IOException { + rmImpl.abortWriteTransaction(transaction); + } + + @Override + public TableSnapshot createSnapshot(String tableName) throws IOException { + return rmImpl.createSnapshot(tableName); + } + + @Override + public TableSnapshot createSnapshot(String tableName, long revision) + throws IOException { + return rmImpl.createSnapshot(tableName, revision); + } + + @Override + public void keepAlive(Transaction transaction) throws IOException { + rmImpl.keepAlive(transaction); + } + + @Override + public List getAbortedWriteTransactions(String table, + String columnFamily) throws IOException { + return rmImpl.getAbortedWriteTransactions(table, columnFamily); + } + +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/RevisionManagerEndpointClient.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/RevisionManagerEndpointClient.java new file mode 100644 index 0000000..fd7e6ae --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/RevisionManagerEndpointClient.java @@ -0,0 +1,125 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase.snapshot; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * This class is nothing but a delegate for the enclosed proxy, + * which is created upon setting the configuration. + */ +public class RevisionManagerEndpointClient implements RevisionManager, Configurable { + + private Configuration conf = null; + private RevisionManager rmProxy; + + @Override + public Configuration getConf() { + return this.conf; + } + + @Override + public void setConf(Configuration arg0) { + this.conf = arg0; + } + + @Override + public void initialize(Configuration conf) { + // do nothing + } + + @Override + public void open() throws IOException { + // clone to adjust RPC settings unique to proxy + Configuration clonedConf = new Configuration(conf); + // conf.set("hbase.ipc.client.connect.max.retries", "0"); + // conf.setInt(HConstants.HBASE_CLIENT_RPC_MAXATTEMPTS, 1); + clonedConf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1); // do not retry RPC + HTable table = new HTable(clonedConf, HConstants.ROOT_TABLE_NAME); + rmProxy = table.coprocessorProxy(RevisionManagerProtocol.class, + Bytes.toBytes("anyRow")); + rmProxy.open(); + } + + @Override + public void close() throws IOException { + rmProxy.close(); + } + + @Override + public void createTable(String table, List columnFamilies) throws IOException { + rmProxy.createTable(table, columnFamilies); + } + + @Override + public void dropTable(String table) throws IOException { + rmProxy.dropTable(table); + } + + @Override + public Transaction beginWriteTransaction(String table, List families) throws IOException { + return rmProxy.beginWriteTransaction(table, families); + } + + @Override + public Transaction beginWriteTransaction(String table, List families, long keepAlive) + throws IOException { + return rmProxy.beginWriteTransaction(table, families, keepAlive); + } + + @Override + public void commitWriteTransaction(Transaction transaction) throws IOException { + rmProxy.commitWriteTransaction(transaction); + } + + @Override + public void abortWriteTransaction(Transaction transaction) throws IOException { + rmProxy.abortWriteTransaction(transaction); + } + + @Override + public List getAbortedWriteTransactions(String table, String columnFamily) + throws IOException { + return rmProxy.getAbortedWriteTransactions(table, columnFamily); + } + + @Override + public TableSnapshot createSnapshot(String tableName) throws IOException { + return rmProxy.createSnapshot(tableName); + } + + @Override + public TableSnapshot createSnapshot(String tableName, long revision) throws IOException { + return rmProxy.createSnapshot(tableName, revision); + } + + @Override + public void keepAlive(Transaction transaction) throws IOException { + rmProxy.keepAlive(transaction); + } + +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/RevisionManagerFactory.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/RevisionManagerFactory.java new file mode 100644 index 0000000..9a64c58 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/RevisionManagerFactory.java @@ -0,0 +1,105 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.hbase.snapshot; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; + +/** + * Utility to instantiate the revision manager (not a true factory actually). + * Depends on HBase configuration to resolve ZooKeeper connection (when ZK is used). + */ +public class RevisionManagerFactory { + + public static final String REVISION_MGR_IMPL_CLASS = "revision.manager.impl.class"; + + /** + * Gets an instance of revision manager. + * + * @param conf The configuration required to created the revision manager. + * @return the revision manager An instance of revision manager. + * @throws IOException Signals that an I/O exception has occurred. + */ + private static RevisionManager getRevisionManager(String className, Configuration conf) throws IOException { + + RevisionManager revisionMgr; + ClassLoader classLoader = Thread.currentThread() + .getContextClassLoader(); + if (classLoader == null) { + classLoader = RevisionManagerFactory.class.getClassLoader(); + } + try { + Class revisionMgrClass = Class + .forName(className, true, classLoader).asSubclass(RevisionManager.class); + revisionMgr = (RevisionManager) revisionMgrClass.newInstance(); + revisionMgr.initialize(conf); + } catch (ClassNotFoundException e) { + throw new IOException( + "The implementation class of revision manager not found.", + e); + } catch (InstantiationException e) { + throw new IOException( + "Exception encountered during instantiating revision manager implementation.", + e); + } catch (IllegalAccessException e) { + throw new IOException( + "IllegalAccessException encountered during instantiating revision manager implementation.", + e); + } catch (IllegalArgumentException e) { + throw new IOException( + "IllegalArgumentException encountered during instantiating revision manager implementation.", + e); + } + return revisionMgr; + } + + /** + * Internally used by endpoint implementation to instantiate from different configuration setting. + * @param className + * @param conf + * @return the opened revision manager + * @throws IOException + */ + static RevisionManager getOpenedRevisionManager(String className, Configuration conf) throws IOException { + + RevisionManager revisionMgr = RevisionManagerFactory.getRevisionManager(className, conf); + if (revisionMgr instanceof Configurable) { + ((Configurable) revisionMgr).setConf(conf); + } + revisionMgr.open(); + return revisionMgr; + } + + /** + * Gets an instance of revision manager which is opened. + * The revision manager implementation can be specified as {@link #REVISION_MGR_IMPL_CLASS}, + * default is {@link ZKBasedRevisionManager}. + * @param conf revision manager configuration + * @return RevisionManager An instance of revision manager. + * @throws IOException + */ + public static RevisionManager getOpenedRevisionManager(Configuration conf) throws IOException { + String className = conf.get(RevisionManagerFactory.REVISION_MGR_IMPL_CLASS, + ZKBasedRevisionManager.class.getName()); + return getOpenedRevisionManager(className, conf); + } + +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/RevisionManagerProtocol.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/RevisionManagerProtocol.java new file mode 100644 index 0000000..a34e277 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/RevisionManagerProtocol.java @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.hbase.snapshot; + +import org.apache.hadoop.hbase.ipc.CoprocessorProtocol; + +/** + * Interface marker to implement RevisionManager as Coprocessor. + * (needs to extend CoprocessorProtocol) + */ +public interface RevisionManagerProtocol extends RevisionManager, + CoprocessorProtocol { + +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/TableSnapshot.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/TableSnapshot.java new file mode 100644 index 0000000..1c500b5 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/TableSnapshot.java @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.hbase.snapshot; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** + * The snapshot for a table and a list of column families. + */ +public class TableSnapshot implements Serializable { + + private String name; + + private Map cfRevisionMap; + + private long latestRevision; + + + public TableSnapshot(String name, Map cfRevMap, long latestRevision) { + this.name = name; + if (cfRevMap == null) { + throw new IllegalArgumentException("revision map cannot be null"); + } + this.cfRevisionMap = cfRevMap; + this.latestRevision = latestRevision; + } + + /** + * Gets the table name. + * + * @return String The name of the table. + */ + public String getTableName() { + return name; + } + + /** + * Gets the column families. + * + * @return List A list of column families associated with the snapshot. + */ + public List getColumnFamilies(){ + return new ArrayList(this.cfRevisionMap.keySet()); + } + + /** + * Gets the revision. + * + * @param familyName The name of the column family. + * @return the revision + */ + public long getRevision(String familyName){ + if(cfRevisionMap.containsKey(familyName)) + return cfRevisionMap.get(familyName); + return latestRevision; + } + + /** + * @return the latest committed revision when this snapshot was taken + */ + public long getLatestRevision() { + return latestRevision; + } + + @Override + public String toString() { + String snapshot = "Table Name : " + name +" Latest Revision: " + latestRevision + + " Column Familiy revision : " + cfRevisionMap.toString(); + return snapshot; + } +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/Transaction.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/Transaction.java new file mode 100644 index 0000000..ccd5001 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/Transaction.java @@ -0,0 +1,116 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.hbase.snapshot; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + +/** + * This class is responsible for storing information related to + * transactions. + */ +public class Transaction implements Serializable { + + private String tableName; + private List columnFamilies = new ArrayList(); + private long timeStamp; + private long keepAlive; + private long revision; + + + Transaction(String tableName, List columnFamilies, long revision, long timestamp) { + this.tableName = tableName; + this.columnFamilies = columnFamilies; + this.timeStamp = timestamp; + this.revision = revision; + } + + /** + * @return The revision number associated with a transaction. + */ + public long getRevisionNumber() { + return this.revision; + } + + /** + * @return The table name associated with a transaction. + */ + public String getTableName() { + return tableName; + } + + /** + * @return The column families associated with a transaction. + */ + public List getColumnFamilies() { + return columnFamilies; + } + + /** + * @return The expire timestamp associated with a transaction. + */ + long getTransactionExpireTimeStamp() { + return this.timeStamp + this.keepAlive; + } + + void setKeepAlive(long seconds) { + this.keepAlive = seconds; + } + + /** + * Gets the keep alive value. + * + * @return long The keep alive value for the transaction. + */ + public long getKeepAliveValue() { + return this.keepAlive; + } + + /** + * Gets the family revision info. + * + * @return FamilyRevision An instance of FamilyRevision associated with the transaction. + */ + FamilyRevision getFamilyRevisionInfo() { + return new FamilyRevision(revision, getTransactionExpireTimeStamp()); + } + + /** + * Keep alive transaction. This methods extends the expire timestamp of a + * transaction by the "keep alive" amount. + */ + void keepAliveTransaction() { + this.timeStamp = this.timeStamp + this.keepAlive; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Revision : "); + sb.append(this.getRevisionNumber()); + sb.append(" Timestamp : "); + sb.append(this.getTransactionExpireTimeStamp()); + sb.append("\n").append("Table : "); + sb.append(this.tableName).append("\n"); + sb.append("Column Families : "); + sb.append(this.columnFamilies.toString()); + return sb.toString(); + } +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/ZKBasedRevisionManager.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/ZKBasedRevisionManager.java new file mode 100644 index 0000000..b93e75d --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/ZKBasedRevisionManager.java @@ -0,0 +1,461 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.hbase.snapshot; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hive.hcatalog.hbase.snapshot.lock.LockListener; +import org.apache.hive.hcatalog.hbase.snapshot.lock.WriteLock; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.ZooDefs.Ids; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The service for providing revision management to Hbase tables. + */ +public class ZKBasedRevisionManager implements RevisionManager { + + private static final Logger LOG = LoggerFactory.getLogger(ZKBasedRevisionManager.class); + private String zkHostList; + private String baseDir; + private ZKUtil zkUtil; + private long writeTxnTimeout; + + + /* + * @see org.apache.hive.hcatalog.hbase.snapshot.RevisionManager#initialize() + */ + @Override + public void initialize(Configuration conf) { + conf = new Configuration(conf); + if (conf.get(RMConstants.ZOOKEEPER_HOSTLIST) == null) { + String zkHostList = conf.get(HConstants.ZOOKEEPER_QUORUM); + int port = conf.getInt(HConstants.ZOOKEEPER_CLIENT_PORT, + HConstants.DEFAULT_ZOOKEPER_CLIENT_PORT); + String[] splits = zkHostList.split(","); + StringBuffer sb = new StringBuffer(); + for (String split : splits) { + sb.append(split); + sb.append(':'); + sb.append(port); + sb.append(','); + } + sb.deleteCharAt(sb.length() - 1); + conf.set(RMConstants.ZOOKEEPER_HOSTLIST, sb.toString()); + } + this.zkHostList = conf.get(RMConstants.ZOOKEEPER_HOSTLIST); + this.baseDir = conf.get(RMConstants.ZOOKEEPER_DATADIR); + this.writeTxnTimeout = Long.parseLong(conf.get(RMConstants.WRITE_TRANSACTION_TIMEOUT)); + } + + /** + * Open a ZooKeeper connection + * @throws java.io.IOException + */ + + public void open() throws IOException { + zkUtil = new ZKUtil(zkHostList, this.baseDir); + zkUtil.createRootZNodes(); + LOG.info("Created root znodes for revision manager."); + } + + /** + * Close Zookeeper connection + */ + public void close() { + zkUtil.closeZKConnection(); + } + + private void checkInputParams(String table, List families) { + if (table == null) { + throw new IllegalArgumentException( + "The table name must be specified for reading."); + } + if (families == null || families.isEmpty()) { + throw new IllegalArgumentException( + "At least one column family should be specified for reading."); + } + } + + @Override + public void createTable(String table, List columnFamilies) throws IOException { + zkUtil.createRootZNodes(); + zkUtil.setUpZnodesForTable(table, columnFamilies); + } + + @Override + public void dropTable(String table) throws IOException { + zkUtil.deleteZNodes(table); + } + + /* @param table + /* @param families + /* @param keepAlive + /* @return + /* @throws IOException + * @see org.apache.hive.hcatalog.hbase.snapshot.RevisionManager#beginWriteTransaction(java.lang.String, java.util.List, long) + */ + public Transaction beginWriteTransaction(String table, + List families, long keepAlive) throws IOException { + + checkInputParams(table, families); + zkUtil.setUpZnodesForTable(table, families); + long nextId = zkUtil.nextId(table); + long expireTimestamp = zkUtil.getTimeStamp(); + Transaction transaction = new Transaction(table, families, nextId, + expireTimestamp); + if (keepAlive != -1) { + transaction.setKeepAlive(keepAlive); + } else { + transaction.setKeepAlive(writeTxnTimeout); + } + + refreshTransactionList(transaction.getTableName()); + String lockPath = prepareLockNode(table); + WriteLock wLock = new WriteLock(zkUtil.getSession(), lockPath, + Ids.OPEN_ACL_UNSAFE); + RMLockListener myLockListener = new RMLockListener(); + wLock.setLockListener(myLockListener); + try { + boolean lockGrabbed = wLock.lock(); + if (lockGrabbed == false) { + //TO DO : Let this request queue up and try obtaining lock. + throw new IOException( + "Unable to obtain lock while beginning transaction. " + + transaction.toString()); + } else { + List colFamilies = transaction.getColumnFamilies(); + FamilyRevision revisionData = transaction.getFamilyRevisionInfo(); + for (String cfamily : colFamilies) { + String path = PathUtil.getRunningTxnInfoPath( + baseDir, table, cfamily); + zkUtil.updateData(path, revisionData, + ZKUtil.UpdateMode.APPEND); + } + } + } catch (KeeperException e) { + throw new IOException("Exception while obtaining lock.", e); + } catch (InterruptedException e) { + throw new IOException("Exception while obtaining lock.", e); + } finally { + wLock.unlock(); + } + + return transaction; + } + + /* @param table The table name. + /* @param families The column families involved in the transaction. + /* @return transaction The transaction which was started. + /* @throws IOException + * @see org.apache.hive.hcatalog.hbase.snapshot.RevisionManager#beginWriteTransaction(java.lang.String, java.util.List) + */ + public Transaction beginWriteTransaction(String table, List families) + throws IOException { + return beginWriteTransaction(table, families, -1); + } + + /** + * This method commits a write transaction. + * @param transaction The revision information associated with transaction. + * @throws java.io.IOException + */ + public void commitWriteTransaction(Transaction transaction) throws IOException { + refreshTransactionList(transaction.getTableName()); + + String lockPath = prepareLockNode(transaction.getTableName()); + WriteLock wLock = new WriteLock(zkUtil.getSession(), lockPath, + Ids.OPEN_ACL_UNSAFE); + RMLockListener myLockListener = new RMLockListener(); + wLock.setLockListener(myLockListener); + try { + boolean lockGrabbed = wLock.lock(); + if (lockGrabbed == false) { + //TO DO : Let this request queue up and try obtaining lock. + throw new IOException( + "Unable to obtain lock while commiting transaction. " + + transaction.toString()); + } else { + String tableName = transaction.getTableName(); + List colFamilies = transaction.getColumnFamilies(); + FamilyRevision revisionData = transaction.getFamilyRevisionInfo(); + for (String cfamily : colFamilies) { + String path = PathUtil.getRunningTxnInfoPath( + baseDir, tableName, cfamily); + zkUtil.updateData(path, revisionData, + ZKUtil.UpdateMode.REMOVE); + } + + } + } catch (KeeperException e) { + throw new IOException("Exception while obtaining lock.", e); + } catch (InterruptedException e) { + throw new IOException("Exception while obtaining lock.", e); + } finally { + wLock.unlock(); + } + LOG.info("Write Transaction committed: " + transaction.toString()); + } + + /** + * This method aborts a write transaction. + * @param transaction + * @throws java.io.IOException + */ + public void abortWriteTransaction(Transaction transaction) throws IOException { + + refreshTransactionList(transaction.getTableName()); + String lockPath = prepareLockNode(transaction.getTableName()); + WriteLock wLock = new WriteLock(zkUtil.getSession(), lockPath, + Ids.OPEN_ACL_UNSAFE); + RMLockListener myLockListener = new RMLockListener(); + wLock.setLockListener(myLockListener); + try { + boolean lockGrabbed = wLock.lock(); + if (lockGrabbed == false) { + //TO DO : Let this request queue up and try obtaining lock. + throw new IOException( + "Unable to obtain lock while aborting transaction. " + + transaction.toString()); + } else { + String tableName = transaction.getTableName(); + List colFamilies = transaction.getColumnFamilies(); + FamilyRevision revisionData = transaction + .getFamilyRevisionInfo(); + for (String cfamily : colFamilies) { + String path = PathUtil.getRunningTxnInfoPath( + baseDir, tableName, cfamily); + zkUtil.updateData(path, revisionData, + ZKUtil.UpdateMode.REMOVE); + path = PathUtil.getAbortInformationPath(baseDir, + tableName, cfamily); + zkUtil.updateData(path, revisionData, + ZKUtil.UpdateMode.APPEND); + } + + } + } catch (KeeperException e) { + throw new IOException("Exception while obtaining lock.", e); + } catch (InterruptedException e) { + throw new IOException("Exception while obtaining lock.", e); + } finally { + wLock.unlock(); + } + LOG.info("Write Transaction aborted: " + transaction.toString()); + } + + + /* @param transaction + /* @throws IOException + * @see org.apache.hive.hcatalog.hbase.snapshot.RevsionManager#keepAlive(org.apache.hive.hcatalog.hbase.snapshot.Transaction) + */ + public void keepAlive(Transaction transaction) + throws IOException { + + refreshTransactionList(transaction.getTableName()); + transaction.keepAliveTransaction(); + String lockPath = prepareLockNode(transaction.getTableName()); + WriteLock wLock = new WriteLock(zkUtil.getSession(), lockPath, + Ids.OPEN_ACL_UNSAFE); + RMLockListener myLockListener = new RMLockListener(); + wLock.setLockListener(myLockListener); + try { + boolean lockGrabbed = wLock.lock(); + if (lockGrabbed == false) { + //TO DO : Let this request queue up and try obtaining lock. + throw new IOException( + "Unable to obtain lock for keep alive of transaction. " + + transaction.toString()); + } else { + String tableName = transaction.getTableName(); + List colFamilies = transaction.getColumnFamilies(); + FamilyRevision revisionData = transaction.getFamilyRevisionInfo(); + for (String cfamily : colFamilies) { + String path = PathUtil.getRunningTxnInfoPath( + baseDir, tableName, cfamily); + zkUtil.updateData(path, revisionData, + ZKUtil.UpdateMode.KEEP_ALIVE); + } + + } + } catch (KeeperException e) { + throw new IOException("Exception while obtaining lock.", e); + } catch (InterruptedException e) { + throw new IOException("Exception while obtaining lock.", e); + } finally { + wLock.unlock(); + } + + } + + /* This method allows the user to create latest snapshot of a + /* table. + /* @param tableName The table whose snapshot is being created. + /* @return TableSnapshot An instance of TableSnaphot + /* @throws IOException + * @see org.apache.hive.hcatalog.hbase.snapshot.RevsionManager#createSnapshot(java.lang.String) + */ + public TableSnapshot createSnapshot(String tableName) throws IOException { + refreshTransactionList(tableName); + long latestID = zkUtil.currentID(tableName); + HashMap cfMap = new HashMap(); + List columnFamilyNames = zkUtil.getColumnFamiliesOfTable(tableName); + + for (String cfName : columnFamilyNames) { + String cfPath = PathUtil.getRunningTxnInfoPath(baseDir, tableName, cfName); + List tranxList = zkUtil.getTransactionList(cfPath); + long version; + if (!tranxList.isEmpty()) { + Collections.sort(tranxList); + // get the smallest running Transaction ID + long runningVersion = tranxList.get(0).getRevision(); + version = runningVersion - 1; + } else { + version = latestID; + } + cfMap.put(cfName, version); + } + + TableSnapshot snapshot = new TableSnapshot(tableName, cfMap, latestID); + LOG.debug("Created snapshot For table: " + tableName + " snapshot: " + snapshot); + return snapshot; + } + + /* This method allows the user to create snapshot of a + /* table with a given revision number. + /* @param tableName + /* @param revision + /* @return TableSnapshot + /* @throws IOException + * @see org.apache.hive.hcatalog.hbase.snapshot.RevsionManager#createSnapshot(java.lang.String, long) + */ + public TableSnapshot createSnapshot(String tableName, long revision) throws IOException { + + long currentID = zkUtil.currentID(tableName); + if (revision > currentID) { + throw new IOException( + "The revision specified in the snapshot is higher than the current revision of the table."); + } + refreshTransactionList(tableName); + HashMap cfMap = new HashMap(); + List columnFamilies = zkUtil.getColumnFamiliesOfTable(tableName); + + for (String cf : columnFamilies) { + cfMap.put(cf, revision); + } + + return new TableSnapshot(tableName, cfMap, revision); + } + + /** + * Get the list of in-progress Transactions for a column family + * @param table the table name + * @param columnFamily the column family name + * @return a list of in-progress WriteTransactions + * @throws java.io.IOException + */ + List getRunningTransactions(String table, + String columnFamily) throws IOException { + String path = PathUtil.getRunningTxnInfoPath(baseDir, table, + columnFamily); + return zkUtil.getTransactionList(path); + } + + @Override + public List getAbortedWriteTransactions(String table, + String columnFamily) throws IOException { + String path = PathUtil.getAbortInformationPath(baseDir, table, columnFamily); + return zkUtil.getTransactionList(path); + } + + private void refreshTransactionList(String tableName) throws IOException { + String lockPath = prepareLockNode(tableName); + WriteLock wLock = new WriteLock(zkUtil.getSession(), lockPath, + Ids.OPEN_ACL_UNSAFE); + RMLockListener myLockListener = new RMLockListener(); + wLock.setLockListener(myLockListener); + try { + boolean lockGrabbed = wLock.lock(); + if (lockGrabbed == false) { + //TO DO : Let this request queue up and try obtaining lock. + throw new IOException( + "Unable to obtain lock while refreshing transactions of table " + + tableName + "."); + } else { + List cfPaths = zkUtil + .getColumnFamiliesOfTable(tableName); + for (String cf : cfPaths) { + String runningDataPath = PathUtil.getRunningTxnInfoPath( + baseDir, tableName, cf); + zkUtil.refreshTransactions(runningDataPath); + } + + } + } catch (KeeperException e) { + throw new IOException("Exception while obtaining lock.", e); + } catch (InterruptedException e) { + throw new IOException("Exception while obtaining lock.", e); + } finally { + wLock.unlock(); + } + + } + + private String prepareLockNode(String tableName) throws IOException { + String txnDataPath = PathUtil.getTxnDataPath(this.baseDir, tableName); + String lockPath = PathUtil.getLockManagementNode(txnDataPath); + zkUtil.ensurePathExists(lockPath, null, Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + return lockPath; + } + + /* + * This class is a listener class for the locks used in revision management. + * TBD: Use the following class to signal that that the lock is actually + * been granted. + */ + class RMLockListener implements LockListener { + + /* + * @see org.apache.hive.hcatalog.hbase.snapshot.lock.LockListener#lockAcquired() + */ + @Override + public void lockAcquired() { + + } + + /* + * @see org.apache.hive.hcatalog.hbase.snapshot.lock.LockListener#lockReleased() + */ + @Override + public void lockReleased() { + + } + + } + + +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/ZKUtil.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/ZKUtil.java new file mode 100644 index 0000000..100dcf0 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/ZKUtil.java @@ -0,0 +1,525 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.hbase.snapshot; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hive.hcatalog.hbase.snapshot.transaction.thrift.StoreFamilyRevision; +import org.apache.hive.hcatalog.hbase.snapshot.transaction.thrift.StoreFamilyRevisionList; +import org.apache.thrift.TBase; +import org.apache.thrift.TDeserializer; +import org.apache.thrift.TSerializer; +import org.apache.thrift.protocol.TBinaryProtocol; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.WatchedEvent; +import org.apache.zookeeper.Watcher; +import org.apache.zookeeper.ZooDefs.Ids; +import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.ZooKeeper.States; +import org.apache.zookeeper.data.ACL; +import org.apache.zookeeper.data.Stat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class ZKUtil { + + private int DEFAULT_SESSION_TIMEOUT = 1000000; + private ZooKeeper zkSession; + private String baseDir; + private String connectString; + private static final Logger LOG = LoggerFactory.getLogger(ZKUtil.class); + + static enum UpdateMode { + APPEND, REMOVE, KEEP_ALIVE + } + + ; + + ZKUtil(String connection, String baseDir) { + this.connectString = connection; + this.baseDir = baseDir; + } + + /** + * This method creates znodes related to table. + * + * @param table The name of the table. + * @param families The list of column families of the table. + * @throws IOException + */ + void setUpZnodesForTable(String table, List families) + throws IOException { + + String transactionDataTablePath = PathUtil.getTxnDataPath(baseDir, table); + ensurePathExists(transactionDataTablePath, null, Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + for (String cf : families) { + String runningDataPath = PathUtil.getRunningTxnInfoPath( + this.baseDir, table, cf); + ensurePathExists(runningDataPath, null, Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + String abortDataPath = PathUtil.getAbortInformationPath( + this.baseDir, table, cf); + ensurePathExists(abortDataPath, null, Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + } + + } + + /** + * This method ensures that a given path exists in zookeeper. If the path + * does not exists, it creates one. + * + * @param path The path of znode that is required to exist. + * @param data The data to be associated with the znode. + * @param acl The ACLs required. + * @param flags The CreateMode for the znode. + * @throws IOException + */ + void ensurePathExists(String path, byte[] data, List acl, + CreateMode flags) throws IOException { + String[] dirs = path.split("/"); + String parentPath = ""; + for (String subDir : dirs) { + if (subDir.equals("") == false) { + parentPath = parentPath + "/" + subDir; + try { + Stat stat = getSession().exists(parentPath, false); + if (stat == null) { + getSession().create(parentPath, data, acl, flags); + } + } catch (Exception e) { + throw new IOException("Exception while creating path " + + parentPath, e); + } + } + } + + } + + /** + * This method returns a list of columns of a table which were used in any + * of the transactions. + * + * @param tableName The name of table. + * @return List The list of column families in table. + * @throws IOException + */ + List getColumnFamiliesOfTable(String tableName) throws IOException { + String path = PathUtil.getTxnDataPath(baseDir, tableName); + List children = null; + List columnFamlies = new ArrayList(); + try { + children = getSession().getChildren(path, false); + } catch (KeeperException e) { + LOG.warn("Caught: ", e); + throw new IOException("Exception while obtaining columns of table.", e); + } catch (InterruptedException e) { + LOG.warn("Caught: ", e); + throw new IOException("Exception while obtaining columns of table.", e); + } + + for (String child : children) { + if ((child.contains("idgen") == false) + && (child.contains("_locknode_") == false)) { + columnFamlies.add(child); + } + } + return columnFamlies; + } + + /** + * This method returns a time stamp for use by the transactions. + * + * @return long The current timestamp in zookeeper. + * @throws IOException + */ + long getTimeStamp() throws IOException { + long timeStamp; + Stat stat; + String clockPath = PathUtil.getClockPath(this.baseDir); + ensurePathExists(clockPath, null, Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + try { + getSession().exists(clockPath, false); + stat = getSession().setData(clockPath, null, -1); + + } catch (KeeperException e) { + LOG.warn("Caught: ", e); + throw new IOException("Exception while obtaining timestamp ", e); + } catch (InterruptedException e) { + LOG.warn("Caught: ", e); + throw new IOException("Exception while obtaining timestamp ", e); + } + timeStamp = stat.getMtime(); + return timeStamp; + } + + /** + * This method returns the next revision number to be used for any + * transaction purposes. + * + * @param tableName The name of the table. + * @return revision number The revision number last used by any transaction. + * @throws IOException + */ + long nextId(String tableName) throws IOException { + String idNode = PathUtil.getRevisionIDNode(this.baseDir, tableName); + ensurePathExists(idNode, Bytes.toBytes("0"), Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + String lockNode = PathUtil.getLockManagementNode(idNode); + ensurePathExists(lockNode, null, Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + IDGenerator idf = new IDGenerator(getSession(), tableName, idNode); + long id = idf.obtainID(); + return id; + } + + /** + * The latest used revision id of the table. + * + * @param tableName The name of the table. + * @return the long The revision number to use by any transaction. + * @throws IOException Signals that an I/O exception has occurred. + */ + long currentID(String tableName) throws IOException { + String idNode = PathUtil.getRevisionIDNode(this.baseDir, tableName); + ensurePathExists(idNode, Bytes.toBytes("0"), Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + String lockNode = PathUtil.getLockManagementNode(idNode); + ensurePathExists(lockNode, null, Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + IDGenerator idf = new IDGenerator(getSession(), tableName, idNode); + long id = idf.readID(); + return id; + } + + /** + * This methods retrieves the list of transaction information associated + * with each column/column family of a table. + * + * @param path The znode path + * @return List of FamilyRevision The list of transactions in the given path. + * @throws IOException + */ + List getTransactionList(String path) + throws IOException { + + byte[] data = getRawData(path, new Stat()); + ArrayList wtxnList = new ArrayList(); + if (data == null) { + return wtxnList; + } + StoreFamilyRevisionList txnList = new StoreFamilyRevisionList(); + deserialize(txnList, data); + Iterator itr = txnList.getRevisionListIterator(); + + while (itr.hasNext()) { + StoreFamilyRevision wtxn = itr.next(); + wtxnList.add(new FamilyRevision(wtxn.getRevision(), wtxn + .getTimestamp())); + } + + return wtxnList; + } + + /** + * This method returns the data associated with the path in zookeeper. + * + * @param path The znode path + * @param stat Zookeeper stat + * @return byte array The data stored in the znode. + * @throws IOException + */ + byte[] getRawData(String path, Stat stat) throws IOException { + byte[] data = null; + try { + data = getSession().getData(path, false, stat); + } catch (Exception e) { + throw new IOException( + "Exception while obtaining raw data from zookeeper path " + + path, e); + } + return data; + } + + /** + * This method created the basic znodes in zookeeper for revision + * management. + * + * @throws IOException + */ + void createRootZNodes() throws IOException { + String txnBaseNode = PathUtil.getTransactionBasePath(this.baseDir); + String clockNode = PathUtil.getClockPath(this.baseDir); + ensurePathExists(txnBaseNode, null, Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + ensurePathExists(clockNode, null, Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + } + + /** + * This method closes the zookeeper session. + */ + void closeZKConnection() { + if (zkSession != null) { + try { + zkSession.close(); + } catch (InterruptedException e) { + LOG.warn("Close failed: ", e); + } + zkSession = null; + LOG.info("Disconnected to ZooKeeper"); + } + } + + /** + * This method returns a zookeeper session. If the current session is closed, + * then a new session is created. + * + * @return ZooKeeper An instance of zookeeper client. + * @throws IOException + */ + ZooKeeper getSession() throws IOException { + if (zkSession == null || zkSession.getState() == States.CLOSED) { + synchronized (this) { + if (zkSession == null || zkSession.getState() == States.CLOSED) { + zkSession = new ZooKeeper(this.connectString, + this.DEFAULT_SESSION_TIMEOUT, new ZKWatcher()); + while (zkSession.getState() == States.CONNECTING) { + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + } + } + } + } + } + return zkSession; + } + + /** + * This method updates the transaction data related to a znode. + * + * @param path The path to the transaction data. + * @param updateTx The FamilyRevision to be updated. + * @param mode The mode to update like append, update, remove. + * @throws IOException + */ + void updateData(String path, FamilyRevision updateTx, UpdateMode mode) + throws IOException { + + if (updateTx == null) { + throw new IOException( + "The transaction to be updated found to be null."); + } + List currentData = getTransactionList(path); + List newData = new ArrayList(); + boolean dataFound = false; + long updateVersion = updateTx.getRevision(); + for (FamilyRevision tranx : currentData) { + if (tranx.getRevision() != updateVersion) { + newData.add(tranx); + } else { + dataFound = true; + } + } + switch (mode) { + case REMOVE: + if (dataFound == false) { + throw new IOException( + "The transaction to be removed not found in the data."); + } + LOG.info("Removed trasaction : " + updateTx.toString()); + break; + case KEEP_ALIVE: + if (dataFound == false) { + throw new IOException( + "The transaction to be kept alove not found in the data. It might have been expired."); + } + newData.add(updateTx); + LOG.info("keep alive of transaction : " + updateTx.toString()); + break; + case APPEND: + if (dataFound == true) { + throw new IOException( + "The data to be appended already exists."); + } + newData.add(updateTx); + LOG.info("Added transaction : " + updateTx.toString()); + break; + } + + // For serialization purposes. + List newTxnList = new ArrayList(); + for (FamilyRevision wtxn : newData) { + StoreFamilyRevision newTxn = new StoreFamilyRevision(wtxn.getRevision(), + wtxn.getExpireTimestamp()); + newTxnList.add(newTxn); + } + StoreFamilyRevisionList wtxnList = new StoreFamilyRevisionList(newTxnList); + byte[] newByteData = serialize(wtxnList); + + Stat stat = null; + try { + stat = zkSession.setData(path, newByteData, -1); + } catch (KeeperException e) { + throw new IOException( + "Exception while updating trasactional data. ", e); + } catch (InterruptedException e) { + throw new IOException( + "Exception while updating trasactional data. ", e); + } + + if (stat != null) { + LOG.info("Transaction list stored at " + path + "."); + } + + } + + /** + * Refresh transactions on a given transaction data path. + * + * @param path The path to the transaction data. + * @throws IOException Signals that an I/O exception has occurred. + */ + void refreshTransactions(String path) throws IOException { + List currentData = getTransactionList(path); + List newData = new ArrayList(); + + for (FamilyRevision tranx : currentData) { + if (tranx.getExpireTimestamp() > getTimeStamp()) { + newData.add(tranx); + } + } + + if (newData.equals(currentData) == false) { + List newTxnList = new ArrayList(); + for (FamilyRevision wtxn : newData) { + StoreFamilyRevision newTxn = new StoreFamilyRevision(wtxn.getRevision(), + wtxn.getExpireTimestamp()); + newTxnList.add(newTxn); + } + StoreFamilyRevisionList wtxnList = new StoreFamilyRevisionList(newTxnList); + byte[] newByteData = serialize(wtxnList); + + try { + zkSession.setData(path, newByteData, -1); + } catch (KeeperException e) { + throw new IOException( + "Exception while updating trasactional data. ", e); + } catch (InterruptedException e) { + throw new IOException( + "Exception while updating trasactional data. ", e); + } + + } + + } + + /** + * Delete table znodes. + * + * @param tableName the hbase table name + * @throws IOException Signals that an I/O exception has occurred. + */ + void deleteZNodes(String tableName) throws IOException { + String transactionDataTablePath = PathUtil.getTxnDataPath(baseDir, + tableName); + deleteRecursively(transactionDataTablePath); + } + + void deleteRecursively(String path) throws IOException { + try { + List children = getSession().getChildren(path, false); + if (children.size() != 0) { + for (String child : children) { + deleteRecursively(path + "/" + child); + } + } + getSession().delete(path, -1); + } catch (KeeperException e) { + throw new IOException( + "Exception while deleting path " + path + ".", e); + } catch (InterruptedException e) { + throw new IOException( + "Exception while deleting path " + path + ".", e); + } + } + + /** + * This method serializes a given instance of TBase object. + * + * @param obj An instance of TBase + * @return byte array The serialized data. + * @throws IOException + */ + static byte[] serialize(TBase obj) throws IOException { + if (obj == null) + return new byte[0]; + try { + TSerializer serializer = new TSerializer( + new TBinaryProtocol.Factory()); + byte[] bytes = serializer.serialize(obj); + return bytes; + } catch (Exception e) { + throw new IOException("Serialization error: ", e); + } + } + + + /** + * This method deserializes the given byte array into the TBase object. + * + * @param obj An instance of TBase + * @param data Output of deserialization. + * @throws IOException + */ + static void deserialize(TBase obj, byte[] data) throws IOException { + if (data == null || data.length == 0) + return; + try { + TDeserializer deserializer = new TDeserializer( + new TBinaryProtocol.Factory()); + deserializer.deserialize(obj, data); + } catch (Exception e) { + throw new IOException("Deserialization error: " + e.getMessage(), e); + } + } + + private class ZKWatcher implements Watcher { + public void process(WatchedEvent event) { + switch (event.getState()) { + case Expired: + LOG.info("The client session has expired. Try opening a new " + + "session and connecting again."); + zkSession = null; + break; + default: + + } + } + } + +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/lock/LockListener.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/lock/LockListener.java new file mode 100644 index 0000000..f53ef97 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/lock/LockListener.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase.snapshot.lock; + +/** + * This class has two methods which are call + * back methods when a lock is acquired and + * when the lock is released. + * This class has been used as-is from the zookeeper 3.3.4 recipes minor changes + * in the package name. + */ +public interface LockListener { + /** + * call back called when the lock + * is acquired + */ + public void lockAcquired(); + + /** + * call back called when the lock is + * released. + */ + public void lockReleased(); +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/lock/ProtocolSupport.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/lock/ProtocolSupport.java new file mode 100644 index 0000000..818f36a --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/lock/ProtocolSupport.java @@ -0,0 +1,195 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase.snapshot.lock; + +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.ZooDefs; +import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.data.ACL; +import org.apache.zookeeper.data.Stat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * A base class for protocol implementations which provides a number of higher + * level helper methods for working with ZooKeeper along with retrying synchronous + * operations if the connection to ZooKeeper closes such as + * {@link #retryOperation(ZooKeeperOperation)} + * This class has been used as-is from the zookeeper 3.4.0 recipes with + * changes in the retry delay, retry count values and package name. + */ +class ProtocolSupport { + private static final Logger LOG = LoggerFactory.getLogger(ProtocolSupport.class); + + protected final ZooKeeper zookeeper; + private AtomicBoolean closed = new AtomicBoolean(false); + private long retryDelay = 500L; + private int retryCount = 3; + private List acl = ZooDefs.Ids.OPEN_ACL_UNSAFE; + + public ProtocolSupport(ZooKeeper zookeeper) { + this.zookeeper = zookeeper; + } + + /** + * Closes this strategy and releases any ZooKeeper resources; but keeps the + * ZooKeeper instance open + */ + public void close() { + if (closed.compareAndSet(false, true)) { + doClose(); + } + } + + /** + * return zookeeper client instance + * @return zookeeper client instance + */ + public ZooKeeper getZookeeper() { + return zookeeper; + } + + /** + * return the acl its using + * @return the acl. + */ + public List getAcl() { + return acl; + } + + /** + * set the acl + * @param acl the acl to set to + */ + public void setAcl(List acl) { + this.acl = acl; + } + + /** + * get the retry delay in milliseconds + * @return the retry delay + */ + public long getRetryDelay() { + return retryDelay; + } + + /** + * Sets the time waited between retry delays + * @param retryDelay the retry delay + */ + public void setRetryDelay(long retryDelay) { + this.retryDelay = retryDelay; + } + + /** + * Allow derived classes to perform + * some custom closing operations to release resources + */ + protected void doClose() { + } + + + /** + * Perform the given operation, retrying if the connection fails + * @return object. it needs to be cast to the callee's expected + * return type. + */ + protected Object retryOperation(ZooKeeperOperation operation) + throws KeeperException, InterruptedException { + KeeperException exception = null; + for (int i = 0; i < retryCount; i++) { + try { + return operation.execute(); + } catch (KeeperException.SessionExpiredException e) { + LOG.warn("Session expired for: " + zookeeper + " so reconnecting due to: " + e, e); + throw e; + } catch (KeeperException.ConnectionLossException e) { + if (exception == null) { + exception = e; + } + LOG.debug("Attempt " + i + " failed with connection loss so " + + "attempting to reconnect: " + e, e); + retryDelay(i); + } + } + throw exception; + } + + /** + * Ensures that the given path exists with no data, the current + * ACL and no flags + * @param path + */ + protected void ensurePathExists(String path) { + ensureExists(path, null, acl, CreateMode.PERSISTENT); + } + + /** + * Ensures that the given path exists with the given data, ACL and flags + * @param path + * @param acl + * @param flags + */ + protected void ensureExists(final String path, final byte[] data, + final List acl, final CreateMode flags) { + try { + retryOperation(new ZooKeeperOperation() { + public boolean execute() throws KeeperException, InterruptedException { + Stat stat = zookeeper.exists(path, false); + if (stat != null) { + return true; + } + zookeeper.create(path, data, acl, flags); + return true; + } + }); + } catch (KeeperException e) { + LOG.warn("Caught: " + e, e); + } catch (InterruptedException e) { + LOG.warn("Caught: " + e, e); + } + } + + /** + * Returns true if this protocol has been closed + * @return true if this protocol is closed + */ + protected boolean isClosed() { + return closed.get(); + } + + /** + * Performs a retry delay if this is not the first attempt + * @param attemptCount the number of the attempts performed so far + */ + protected void retryDelay(int attemptCount) { + if (attemptCount > 0) { + try { + Thread.sleep(attemptCount * retryDelay); + } catch (InterruptedException e) { + LOG.debug("Failed to sleep: " + e, e); + } + } + } +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/lock/WriteLock.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/lock/WriteLock.java new file mode 100644 index 0000000..4a66ca5 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/lock/WriteLock.java @@ -0,0 +1,303 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase.snapshot.lock; + +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.WatchedEvent; +import org.apache.zookeeper.Watcher; + +import static org.apache.zookeeper.CreateMode.EPHEMERAL_SEQUENTIAL; + +import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.data.ACL; +import org.apache.zookeeper.data.Stat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.SortedSet; +import java.util.TreeSet; + +/** + * A protocol to implement an exclusive + * write lock or to elect a leader.

You invoke {@link #lock()} to + * start the process of grabbing the lock; you may get the lock then or it may be + * some time later.

You can register a listener so that you are invoked + * when you get the lock; otherwise you can ask if you have the lock + * by calling {@link #isOwner()} + * This class has been used as-is from the zookeeper 3.4.0 recipes. The only change + * made is a TODO for sorting using suffixes and the package name. + */ +public class WriteLock extends ProtocolSupport { + private static final Logger LOG = LoggerFactory.getLogger(WriteLock.class); + + private final String dir; + private String id; + private ZNodeName idName; + private String ownerId; + private String lastChildId; + private byte[] data = {0x12, 0x34}; + private LockListener callback; + private LockZooKeeperOperation zop; + + /** + * zookeeper contructor for writelock + * @param zookeeper zookeeper client instance + * @param dir the parent path you want to use for locking + * @param acl the acls that you want to use for all the paths, + * if null world read/write is used. + */ + public WriteLock(ZooKeeper zookeeper, String dir, List acl) { + super(zookeeper); + this.dir = dir; + if (acl != null) { + setAcl(acl); + } + this.zop = new LockZooKeeperOperation(); + } + + /** + * zookeeper contructor for writelock with callback + * @param zookeeper the zookeeper client instance + * @param dir the parent path you want to use for locking + * @param acl the acls that you want to use for all the paths + * @param callback the call back instance + */ + public WriteLock(ZooKeeper zookeeper, String dir, List acl, + LockListener callback) { + this(zookeeper, dir, acl); + this.callback = callback; + } + + /** + * return the current locklistener + * @return the locklistener + */ + public LockListener getLockListener() { + return this.callback; + } + + /** + * register a different call back listener + * @param callback the call back instance + */ + public void setLockListener(LockListener callback) { + this.callback = callback; + } + + /** + * Removes the lock or associated znode if + * you no longer require the lock. this also + * removes your request in the queue for locking + * in case you do not already hold the lock. + * @throws RuntimeException throws a runtime exception + * if it cannot connect to zookeeper. + */ + public synchronized void unlock() throws RuntimeException { + + if (!isClosed() && id != null) { + // we don't need to retry this operation in the case of failure + // as ZK will remove ephemeral files and we don't wanna hang + // this process when closing if we cannot reconnect to ZK + try { + + ZooKeeperOperation zopdel = new ZooKeeperOperation() { + public boolean execute() throws KeeperException, + InterruptedException { + zookeeper.delete(id, -1); + return Boolean.TRUE; + } + }; + zopdel.execute(); + } catch (InterruptedException e) { + LOG.warn("Caught: " + e, e); + //set that we have been interrupted. + Thread.currentThread().interrupt(); + } catch (KeeperException.NoNodeException e) { + // do nothing + } catch (KeeperException e) { + LOG.warn("Caught: " + e, e); + throw (RuntimeException) new RuntimeException(e.getMessage()). + initCause(e); + } finally { + if (callback != null) { + callback.lockReleased(); + } + id = null; + } + } + } + + /** + * the watcher called on + * getting watch while watching + * my predecessor + */ + private class LockWatcher implements Watcher { + public void process(WatchedEvent event) { + // lets either become the leader or watch the new/updated node + LOG.debug("Watcher fired on path: " + event.getPath() + " state: " + + event.getState() + " type " + event.getType()); + try { + lock(); + } catch (Exception e) { + LOG.warn("Failed to acquire lock: " + e, e); + } + } + } + + /** + * a zoookeeper operation that is mainly responsible + * for all the magic required for locking. + */ + private class LockZooKeeperOperation implements ZooKeeperOperation { + + /** find if we have been created earler if not create our node + * + * @param prefix the prefix node + * @param zookeeper teh zookeeper client + * @param dir the dir paretn + * @throws KeeperException + * @throws InterruptedException + */ + private void findPrefixInChildren(String prefix, ZooKeeper zookeeper, String dir) + throws KeeperException, InterruptedException { + List names = zookeeper.getChildren(dir, false); + for (String name : names) { + if (name.startsWith(prefix)) { + id = name; + if (LOG.isDebugEnabled()) { + LOG.debug("Found id created last time: " + id); + } + break; + } + } + if (id == null) { + id = zookeeper.create(dir + "/" + prefix, data, + getAcl(), EPHEMERAL_SEQUENTIAL); + + if (LOG.isDebugEnabled()) { + LOG.debug("Created id: " + id); + } + } + + } + + /** + * the command that is run and retried for actually + * obtaining the lock + * @return if the command was successful or not + */ + public boolean execute() throws KeeperException, InterruptedException { + do { + if (id == null) { + long sessionId = zookeeper.getSessionId(); + String prefix = "x-" + sessionId + "-"; + // lets try look up the current ID if we failed + // in the middle of creating the znode + findPrefixInChildren(prefix, zookeeper, dir); + idName = new ZNodeName(id); + } + if (id != null) { + List names = zookeeper.getChildren(dir, false); + if (names.isEmpty()) { + LOG.warn("No children in: " + dir + " when we've just " + + "created one! Lets recreate it..."); + // lets force the recreation of the id + id = null; + } else { + // lets sort them explicitly (though they do seem to come back in order ususally :) + SortedSet sortedNames = new TreeSet(); + for (String name : names) { + //TODO: Just use the suffix to sort. + sortedNames.add(new ZNodeName(dir + "/" + name)); + } + ownerId = sortedNames.first().getName(); + SortedSet lessThanMe = sortedNames.headSet(idName); + if (!lessThanMe.isEmpty()) { + ZNodeName lastChildName = lessThanMe.last(); + lastChildId = lastChildName.getName(); + if (LOG.isDebugEnabled()) { + LOG.debug("watching less than me node: " + lastChildId); + } + Stat stat = zookeeper.exists(lastChildId, new LockWatcher()); + if (stat != null) { + return Boolean.FALSE; + } else { + LOG.warn("Could not find the" + + " stats for less than me: " + lastChildName.getName()); + } + } else { + if (isOwner()) { + if (callback != null) { + callback.lockAcquired(); + } + return Boolean.TRUE; + } + } + } + } + } + while (id == null); + return Boolean.FALSE; + } + } + + ; + + /** + * Attempts to acquire the exclusive write lock returning whether or not it was + * acquired. Note that the exclusive lock may be acquired some time later after + * this method has been invoked due to the current lock owner going away. + */ + public synchronized boolean lock() throws KeeperException, InterruptedException { + if (isClosed()) { + return false; + } + ensurePathExists(dir); + + return (Boolean) retryOperation(zop); + } + + /** + * return the parent dir for lock + * @return the parent dir used for locks. + */ + public String getDir() { + return dir; + } + + /** + * Returns true if this node is the owner of the + * lock (or the leader) + */ + public boolean isOwner() { + return id != null && ownerId != null && id.equals(ownerId); + } + + /** + * return the id for this lock + * @return the id for this lock + */ + public String getId() { + return this.id; + } +} + diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/lock/ZNodeName.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/lock/ZNodeName.java new file mode 100644 index 0000000..9eeef3f --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/lock/ZNodeName.java @@ -0,0 +1,113 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase.snapshot.lock; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Represents an ephemeral znode name which has an ordered sequence number + * and can be sorted in order + * This class has been used as-is from the zookeeper 3.4.0 recipes with a + * change in package name. + */ +public class ZNodeName implements Comparable { + private final String name; + private String prefix; + private int sequence = -1; + private static final Logger LOG = LoggerFactory.getLogger(ZNodeName.class); + + public ZNodeName(String name) { + if (name == null) { + throw new NullPointerException("id cannot be null"); + } + this.name = name; + this.prefix = name; + int idx = name.lastIndexOf('-'); + if (idx >= 0) { + this.prefix = name.substring(0, idx); + try { + this.sequence = Integer.parseInt(name.substring(idx + 1)); + // If an exception occurred we misdetected a sequence suffix, + // so return -1. + } catch (NumberFormatException e) { + LOG.info("Number format exception for " + idx, e); + } catch (ArrayIndexOutOfBoundsException e) { + LOG.info("Array out of bounds for " + idx, e); + } + } + } + + @Override + public String toString() { + return name.toString(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + ZNodeName sequence = (ZNodeName) o; + + if (!name.equals(sequence.name)) return false; + + return true; + } + + @Override + public int hashCode() { + return name.hashCode() + 37; + } + + public int compareTo(ZNodeName that) { + int answer = this.prefix.compareTo(that.prefix); + if (answer == 0) { + int s1 = this.sequence; + int s2 = that.sequence; + if (s1 == -1 && s2 == -1) { + return this.name.compareTo(that.name); + } + answer = s1 == -1 ? 1 : s2 == -1 ? -1 : s1 - s2; + } + return answer; + } + + /** + * Returns the name of the znode + */ + public String getName() { + return name; + } + + /** + * Returns the sequence number + */ + public int getZNodeName() { + return sequence; + } + + /** + * Returns the text prefix before the sequence number + */ + public String getPrefix() { + return prefix; + } +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/lock/ZooKeeperOperation.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/lock/ZooKeeperOperation.java new file mode 100644 index 0000000..fc77994 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/lock/ZooKeeperOperation.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase.snapshot.lock; + +import org.apache.zookeeper.KeeperException; + +/** + * A callback object which can be used for implementing retry-able operations in the + * {@link org.apache.hive.hcatalog.hbase.snapshot.lock.ProtocolSupport} class + * This class has been used as-is from the zookeeper 3.4.0 with change in the + * package name . + */ +public interface ZooKeeperOperation { + + /** + * Performs the operation - which may be involved multiple times if the connection + * to ZooKeeper closes during this operation + * + * @return the result of the operation or null + * @throws KeeperException + * @throws InterruptedException + */ + public boolean execute() throws KeeperException, InterruptedException; +} diff --git hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/package-info.java hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/package-info.java new file mode 100644 index 0000000..24c9e2d --- /dev/null +++ hcatalog/storage-handlers/hbase/src/java/org/apache/hive/hcatalog/hbase/snapshot/package-info.java @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/** + * Provides a revision manager for data stored in HBase that can be used to implement repeatable reads. + * The component is designed to be usable for revision management of data stored in HBase in general, + * independent and not limited to HCatalog. It is used by the HCatalog HBase storage handler, implementation depends on HBase 0.92+. + *

+ * For more information please see + * Snapshots and Repeatable reads for HBase Tables. + * @since 0.4 + */ +package org.apache.hive.hcatalog.hbase.snapshot; diff --git hcatalog/storage-handlers/hbase/src/resources/revision-manager-default.xml hcatalog/storage-handlers/hbase/src/resources/revision-manager-default.xml index f5eca58..316a4a1 100644 --- hcatalog/storage-handlers/hbase/src/resources/revision-manager-default.xml +++ hcatalog/storage-handlers/hbase/src/resources/revision-manager-default.xml @@ -23,14 +23,14 @@ revision.manager.impl.class - org.apache.hcatalog.hbase.snapshot.ZKBasedRevisionManager + org.apache.hive.hcatalog.hbase.snapshot.ZKBasedRevisionManager Which revision manager implementation to use. revision.manager.endpoint.impl.class - org.apache.hcatalog.hbase.snapshot.ZKBasedRevisionManager + org.apache.hive.hcatalog.hbase.snapshot.ZKBasedRevisionManager diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/ManyMiniCluster.java hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/ManyMiniCluster.java deleted file mode 100644 index 8557cda..0000000 --- hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/ManyMiniCluster.java +++ /dev/null @@ -1,370 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.MiniHBaseCluster; -import org.apache.hadoop.hbase.client.HConnectionManager; -import org.apache.hadoop.hbase.client.HTable; -import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster; -import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.MiniMRCluster; - -import java.io.File; -import java.io.IOException; -import java.net.ServerSocket; - -/** - * MiniCluster class composed of a number of Hadoop Minicluster implementations - * and other necessary daemons needed for testing (HBase, Hive MetaStore, Zookeeper, MiniMRCluster) - */ -public class ManyMiniCluster { - - //MR stuff - private boolean miniMRClusterEnabled; - private MiniMRCluster mrCluster; - private int numTaskTrackers; - private JobConf jobConf; - - //HBase stuff - private boolean miniHBaseClusterEnabled; - private MiniHBaseCluster hbaseCluster; - private String hbaseRoot; - private Configuration hbaseConf; - private String hbaseDir; - - //ZK Stuff - private boolean miniZookeeperClusterEnabled; - private MiniZooKeeperCluster zookeeperCluster; - private int zookeeperPort; - private String zookeeperDir; - - //DFS Stuff - private MiniDFSCluster dfsCluster; - - //Hive Stuff - private boolean miniHiveMetastoreEnabled; - private HiveConf hiveConf; - private HiveMetaStoreClient hiveMetaStoreClient; - - private final File workDir; - private boolean started = false; - - - /** - * create a cluster instance using a builder which will expose configurable options - * @param workDir working directory ManyMiniCluster will use for all of it's *Minicluster instances - * @return a Builder instance - */ - public static Builder create(File workDir) { - return new Builder(workDir); - } - - private ManyMiniCluster(Builder b) { - workDir = b.workDir; - numTaskTrackers = b.numTaskTrackers; - hiveConf = b.hiveConf; - jobConf = b.jobConf; - hbaseConf = b.hbaseConf; - miniMRClusterEnabled = b.miniMRClusterEnabled; - miniHBaseClusterEnabled = b.miniHBaseClusterEnabled; - miniHiveMetastoreEnabled = b.miniHiveMetastoreEnabled; - miniZookeeperClusterEnabled = b.miniZookeeperClusterEnabled; - } - - protected synchronized void start() { - try { - if (!started) { - FileUtil.fullyDelete(workDir); - if (miniMRClusterEnabled) { - setupMRCluster(); - } - if (miniZookeeperClusterEnabled || miniHBaseClusterEnabled) { - miniZookeeperClusterEnabled = true; - setupZookeeper(); - } - if (miniHBaseClusterEnabled) { - setupHBaseCluster(); - } - if (miniHiveMetastoreEnabled) { - setUpMetastore(); - } - } - } catch (Exception e) { - throw new IllegalStateException("Failed to setup cluster", e); - } - } - - protected synchronized void stop() { - if (hbaseCluster != null) { - HConnectionManager.deleteAllConnections(true); - try { - hbaseCluster.shutdown(); - } catch (Exception e) { - e.printStackTrace(); - } - hbaseCluster = null; - } - if (zookeeperCluster != null) { - try { - zookeeperCluster.shutdown(); - } catch (Exception e) { - e.printStackTrace(); - } - zookeeperCluster = null; - } - if (mrCluster != null) { - try { - mrCluster.shutdown(); - } catch (Exception e) { - e.printStackTrace(); - } - mrCluster = null; - } - if (dfsCluster != null) { - try { - dfsCluster.getFileSystem().close(); - dfsCluster.shutdown(); - } catch (Exception e) { - e.printStackTrace(); - } - dfsCluster = null; - } - try { - FileSystem.closeAll(); - } catch (IOException e) { - e.printStackTrace(); - } - started = false; - } - - /** - * @return Configuration of mini HBase cluster - */ - public Configuration getHBaseConf() { - return HBaseConfiguration.create(hbaseConf); - } - - /** - * @return Configuration of mini MR cluster - */ - public Configuration getJobConf() { - return new Configuration(jobConf); - } - - /** - * @return Configuration of Hive Metastore, this is a standalone not a daemon - */ - public HiveConf getHiveConf() { - return new HiveConf(hiveConf); - } - - /** - * @return Filesystem used by MiniMRCluster and MiniHBaseCluster - */ - public FileSystem getFileSystem() { - try { - return FileSystem.get(jobConf); - } catch (IOException e) { - throw new IllegalStateException("Failed to get FileSystem", e); - } - } - - /** - * @return Metastore client instance - */ - public HiveMetaStoreClient getHiveMetaStoreClient() { - return hiveMetaStoreClient; - } - - private void setupMRCluster() { - try { - final int jobTrackerPort = findFreePort(); - final int taskTrackerPort = findFreePort(); - - if (jobConf == null) - jobConf = new JobConf(); - - jobConf.setInt("mapred.submit.replication", 1); - jobConf.set("yarn.scheduler.capacity.root.queues", "default"); - jobConf.set("yarn.scheduler.capacity.root.default.capacity", "100"); - //conf.set("hadoop.job.history.location",new File(workDir).getAbsolutePath()+"/history"); - System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); - - mrCluster = new MiniMRCluster(jobTrackerPort, - taskTrackerPort, - numTaskTrackers, - getFileSystem().getUri().toString(), - numTaskTrackers, - null, - null, - null, - jobConf); - - jobConf = mrCluster.createJobConf(); - } catch (IOException e) { - throw new IllegalStateException("Failed to Setup MR Cluster", e); - } - } - - private void setupZookeeper() { - try { - zookeeperDir = new File(workDir, "zk").getAbsolutePath(); - zookeeperPort = findFreePort(); - zookeeperCluster = new MiniZooKeeperCluster(); - zookeeperCluster.setDefaultClientPort(zookeeperPort); - zookeeperCluster.startup(new File(zookeeperDir)); - } catch (Exception e) { - throw new IllegalStateException("Failed to Setup Zookeeper Cluster", e); - } - } - - private void setupHBaseCluster() { - final int numRegionServers = 1; - - try { - hbaseDir = new File(workDir, "hbase").toString(); - hbaseDir = hbaseDir.replaceAll("\\\\", "/"); - hbaseRoot = "file://" + hbaseDir; - - if (hbaseConf == null) - hbaseConf = HBaseConfiguration.create(); - - hbaseConf.set("hbase.rootdir", hbaseRoot); - hbaseConf.set("hbase.master", "local"); - hbaseConf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, zookeeperPort); - hbaseConf.set(HConstants.ZOOKEEPER_QUORUM, "127.0.0.1"); - hbaseConf.setInt("hbase.master.port", findFreePort()); - hbaseConf.setInt("hbase.master.info.port", -1); - hbaseConf.setInt("hbase.regionserver.port", findFreePort()); - hbaseConf.setInt("hbase.regionserver.info.port", -1); - - hbaseCluster = new MiniHBaseCluster(hbaseConf, numRegionServers); - hbaseConf.set("hbase.master", hbaseCluster.getMaster().getServerName().getHostAndPort()); - //opening the META table ensures that cluster is running - new HTable(hbaseConf, HConstants.META_TABLE_NAME); - } catch (Exception e) { - throw new IllegalStateException("Failed to setup HBase Cluster", e); - } - } - - private void setUpMetastore() throws Exception { - if (hiveConf == null) - hiveConf = new HiveConf(this.getClass()); - - //The default org.apache.hadoop.hive.ql.hooks.PreExecutePrinter hook - //is present only in the ql/test directory - hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); - hiveConf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, - "jdbc:derby:" + new File(workDir + "/metastore_db") + ";create=true"); - hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.toString(), - new File(workDir, "warehouse").toString()); - //set where derby logs - File derbyLogFile = new File(workDir + "/derby.log"); - derbyLogFile.createNewFile(); - System.setProperty("derby.stream.error.file", derbyLogFile.getPath()); - - -// Driver driver = new Driver(hiveConf); -// SessionState.start(new CliSessionState(hiveConf)); - - hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf); - } - - private static int findFreePort() throws IOException { - ServerSocket server = new ServerSocket(0); - int port = server.getLocalPort(); - server.close(); - return port; - } - - public static class Builder { - private File workDir; - private int numTaskTrackers = 1; - private JobConf jobConf; - private Configuration hbaseConf; - private HiveConf hiveConf; - - private boolean miniMRClusterEnabled = true; - private boolean miniHBaseClusterEnabled = true; - private boolean miniHiveMetastoreEnabled = true; - private boolean miniZookeeperClusterEnabled = true; - - - private Builder(File workDir) { - this.workDir = workDir; - } - - public Builder numTaskTrackers(int num) { - numTaskTrackers = num; - return this; - } - - public Builder jobConf(JobConf jobConf) { - this.jobConf = jobConf; - return this; - } - - public Builder hbaseConf(Configuration hbaseConf) { - this.hbaseConf = hbaseConf; - return this; - } - - public Builder hiveConf(HiveConf hiveConf) { - this.hiveConf = hiveConf; - return this; - } - - public Builder miniMRClusterEnabled(boolean enabled) { - this.miniMRClusterEnabled = enabled; - return this; - } - - public Builder miniHBaseClusterEnabled(boolean enabled) { - this.miniHBaseClusterEnabled = enabled; - return this; - } - - public Builder miniZookeeperClusterEnabled(boolean enabled) { - this.miniZookeeperClusterEnabled = enabled; - return this; - } - - public Builder miniHiveMetastoreEnabled(boolean enabled) { - this.miniHiveMetastoreEnabled = enabled; - return this; - } - - - public ManyMiniCluster build() { - return new ManyMiniCluster(this); - } - - } -} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/SkeletonHBaseTest.java hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/SkeletonHBaseTest.java deleted file mode 100644 index aefe450..0000000 --- hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/SkeletonHBaseTest.java +++ /dev/null @@ -1,237 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase; - -import java.io.File; -import java.io.IOException; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Random; -import java.util.Set; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.HColumnDescriptor; -import org.apache.hadoop.hbase.HTableDescriptor; -import org.apache.hadoop.hbase.client.HBaseAdmin; -import org.apache.hadoop.hive.conf.HiveConf; -import org.junit.AfterClass; -import org.junit.BeforeClass; - -/** - * Base class for HBase Tests which need a mini cluster instance - */ -public abstract class SkeletonHBaseTest { - - protected static String TEST_DIR = "/tmp/build/test/data/"; - - protected final static String DEFAULT_CONTEXT_HANDLE = "default"; - - protected static Map contextMap = new HashMap(); - protected static Set tableNames = new HashSet(); - - /** - * Allow tests to alter the default MiniCluster configuration. - * (requires static initializer block as all setup here is static) - */ - protected static Configuration testConf = null; - - protected void createTable(String tableName, String[] families) { - try { - HBaseAdmin admin = new HBaseAdmin(getHbaseConf()); - HTableDescriptor tableDesc = new HTableDescriptor(tableName); - for (String family : families) { - HColumnDescriptor columnDescriptor = new HColumnDescriptor(family); - tableDesc.addFamily(columnDescriptor); - } - admin.createTable(tableDesc); - } catch (Exception e) { - e.printStackTrace(); - throw new IllegalStateException(e); - } - - } - - protected String newTableName(String prefix) { - String name = null; - int tries = 100; - do { - name = prefix + "_" + Math.abs(new Random().nextLong()); - } while (tableNames.contains(name) && --tries > 0); - if (tableNames.contains(name)) - throw new IllegalStateException("Couldn't find a unique table name, tableNames size: " + tableNames.size()); - tableNames.add(name); - return name; - } - - - /** - * startup an hbase cluster instance before a test suite runs - */ - @BeforeClass - public static void setup() { - if (!contextMap.containsKey(getContextHandle())) - contextMap.put(getContextHandle(), new Context(getContextHandle())); - - contextMap.get(getContextHandle()).start(); - } - - /** - * shutdown an hbase cluster instance ant the end of the test suite - */ - @AfterClass - public static void tearDown() { - contextMap.get(getContextHandle()).stop(); - } - - /** - * override this with a different context handle if tests suites are run simultaneously - * and ManyMiniCluster instances shouldn't be shared - * @return - */ - public static String getContextHandle() { - return DEFAULT_CONTEXT_HANDLE; - } - - /** - * @return working directory for a given test context, which normally is a test suite - */ - public String getTestDir() { - return contextMap.get(getContextHandle()).getTestDir(); - } - - /** - * @return ManyMiniCluster instance - */ - public ManyMiniCluster getCluster() { - return contextMap.get(getContextHandle()).getCluster(); - } - - /** - * @return configuration of MiniHBaseCluster - */ - public Configuration getHbaseConf() { - return contextMap.get(getContextHandle()).getHbaseConf(); - } - - /** - * @return configuration of MiniMRCluster - */ - public Configuration getJobConf() { - return contextMap.get(getContextHandle()).getJobConf(); - } - - /** - * @return configuration of Hive Metastore - */ - public HiveConf getHiveConf() { - return contextMap.get(getContextHandle()).getHiveConf(); - } - - /** - * @return filesystem used by ManyMiniCluster daemons - */ - public FileSystem getFileSystem() { - return contextMap.get(getContextHandle()).getFileSystem(); - } - - /** - * class used to encapsulate a context which is normally used by - * a single TestSuite or across TestSuites when multi-threaded testing is turned on - */ - public static class Context { - protected String testDir; - protected ManyMiniCluster cluster; - - protected Configuration hbaseConf; - protected Configuration jobConf; - protected HiveConf hiveConf; - - protected FileSystem fileSystem; - - protected int usageCount = 0; - - public Context(String handle) { - testDir = new File(TEST_DIR + "/test_" + handle + "_" + Math.abs(new Random().nextLong()) + "/").getPath(); - System.out.println("Cluster work directory: " + testDir); - } - - public void start() { - if (usageCount++ == 0) { - ManyMiniCluster.Builder b = ManyMiniCluster.create(new File(testDir)); - if (testConf != null) { - b.hbaseConf(HBaseConfiguration.create(testConf)); - } - cluster = b.build(); - cluster.start(); - this.hbaseConf = cluster.getHBaseConf(); - jobConf = cluster.getJobConf(); - fileSystem = cluster.getFileSystem(); - hiveConf = cluster.getHiveConf(); - } - } - - public void stop() { - if (--usageCount == 0) { - try { - cluster.stop(); - cluster = null; - } finally { - System.out.println("Trying to cleanup: " + testDir); - try { - FileSystem fs = FileSystem.get(jobConf); - fs.delete(new Path(testDir), true); - } catch (IOException e) { - throw new IllegalStateException("Failed to cleanup test dir", e); - } - - } - } - } - - public String getTestDir() { - return testDir; - } - - public ManyMiniCluster getCluster() { - return cluster; - } - - public Configuration getHbaseConf() { - return hbaseConf; - } - - public Configuration getJobConf() { - return jobConf; - } - - public HiveConf getHiveConf() { - return hiveConf; - } - - public FileSystem getFileSystem() { - return fileSystem; - } - } - -} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseBulkOutputFormat.java hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseBulkOutputFormat.java deleted file mode 100644 index 54c65cd..0000000 --- hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseBulkOutputFormat.java +++ /dev/null @@ -1,631 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.client.HTable; -import org.apache.hadoop.hbase.client.Put; -import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hbase.client.ResultScanner; -import org.apache.hadoop.hbase.client.Scan; -import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapred.JobClient; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.OutputCollector; -import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapred.RunningJob; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.hcatalog.cli.HCatDriver; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.hbase.HBaseBulkOutputFormat.HBaseBulkOutputCommitter; -import org.apache.hcatalog.hbase.TestHBaseDirectOutputFormat.MapReadAbortedTransaction; -import org.apache.hcatalog.hbase.TestHBaseDirectOutputFormat.MapWriteAbortTransaction; -import org.apache.hcatalog.hbase.snapshot.FamilyRevision; -import org.apache.hcatalog.hbase.snapshot.RevisionManager; -import org.apache.hcatalog.hbase.snapshot.RevisionManagerConfiguration; -import org.apache.hcatalog.hbase.snapshot.TableSnapshot; -import org.apache.hcatalog.hbase.snapshot.Transaction; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.HCatOutputFormat; -import org.apache.hcatalog.mapreduce.OutputJobInfo; - -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.Arrays; -import java.util.List; -import java.util.Map; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -/** - * Tests components of HBaseHCatStorageHandler using ManyMiniCluster. - * Including ImprtSequenceFile and HBaseBulkOutputFormat - */ -public class TestHBaseBulkOutputFormat extends SkeletonHBaseTest { - private final static Logger LOG = LoggerFactory.getLogger(TestHBaseBulkOutputFormat.class); - - private final HiveConf allConf; - private final HCatDriver hcatDriver; - - public TestHBaseBulkOutputFormat() { - allConf = getHiveConf(); - allConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - allConf.set(HiveConf.ConfVars.HADOOPFS.varname, getFileSystem().getUri().toString()); - allConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, new Path(getTestDir(), "warehouse").toString()); - - //Add hbase properties - for (Map.Entry el : getHbaseConf()) - allConf.set(el.getKey(), el.getValue()); - for (Map.Entry el : getJobConf()) - allConf.set(el.getKey(), el.getValue()); - - HBaseConfiguration.merge( - allConf, - RevisionManagerConfiguration.create()); - SessionState.start(new CliSessionState(allConf)); - hcatDriver = new HCatDriver(); - } - - public static class MapWriteOldMapper implements org.apache.hadoop.mapred.Mapper { - - @Override - public void close() throws IOException { - } - - @Override - public void configure(JobConf job) { - } - - @Override - public void map(LongWritable key, Text value, - OutputCollector output, - Reporter reporter) throws IOException { - String vals[] = value.toString().split(","); - Put put = new Put(Bytes.toBytes(vals[0])); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - put.add(Bytes.toBytes("my_family"), - Bytes.toBytes(pair[0]), - Bytes.toBytes(pair[1])); - } - output.collect(new ImmutableBytesWritable(Bytes.toBytes(vals[0])), put); - } - - } - - public static class MapWrite extends Mapper { - - @Override - public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - String vals[] = value.toString().split(","); - Put put = new Put(Bytes.toBytes(vals[0])); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - put.add(Bytes.toBytes("my_family"), - Bytes.toBytes(pair[0]), - Bytes.toBytes(pair[1])); - } - context.write(new ImmutableBytesWritable(Bytes.toBytes(vals[0])), put); - } - } - - public static class MapHCatWrite extends Mapper { - @Override - public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - HCatRecord record = new DefaultHCatRecord(3); - HCatSchema schema = jobInfo.getOutputSchema(); - String vals[] = value.toString().split(","); - record.setInteger("key", schema, Integer.parseInt(vals[0])); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - record.set(pair[0], schema, pair[1]); - } - context.write(null, record); - } - } - - @Test - public void hbaseBulkOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException { - String testName = "hbaseBulkOutputFormatTest"; - Path methodTestDir = new Path(getTestDir(), testName); - LOG.info("starting: " + testName); - - String tableName = newTableName(testName).toLowerCase(); - String familyName = "my_family"; - byte[] familyNameBytes = Bytes.toBytes(familyName); - - //include hbase config in conf file - Configuration conf = new Configuration(allConf); - - //create table - conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); - conf.set("yarn.scheduler.capacity.root.queues", "default"); - conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); - createTable(tableName, new String[]{familyName}); - - String data[] = {"1,english:one,spanish:uno", - "2,english:two,spanish:dos", - "3,english:three,spanish:tres"}; - - - // input/output settings - Path inputPath = new Path(methodTestDir, "mr_input"); - FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); - for (String line : data) - os.write(Bytes.toBytes(line + "\n")); - os.close(); - Path interPath = new Path(methodTestDir, "inter"); - //create job - JobConf job = new JobConf(conf); - job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapWriteOldMapper.class); - - job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); - org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath); - - job.setOutputFormat(HBaseBulkOutputFormat.class); - org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(job, interPath); - job.setOutputCommitter(HBaseBulkOutputCommitter.class); - - //manually create transaction - RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); - try { - OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null); - Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName)); - outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, - HCatUtil.serialize(txn)); - job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, - HCatUtil.serialize(outputJobInfo)); - } finally { - rm.close(); - } - - job.setMapOutputKeyClass(ImmutableBytesWritable.class); - job.setMapOutputValueClass(HCatRecord.class); - - job.setOutputKeyClass(ImmutableBytesWritable.class); - job.setOutputValueClass(HCatRecord.class); - - job.setNumReduceTasks(0); - - RunningJob runJob = JobClient.runJob(job); - runJob.waitForCompletion(); - assertTrue(runJob.isSuccessful()); - - //verify - HTable table = new HTable(conf, tableName); - Scan scan = new Scan(); - scan.addFamily(familyNameBytes); - ResultScanner scanner = table.getScanner(scan); - int index = 0; - for (Result result : scanner) { - String vals[] = data[index].toString().split(","); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); - assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); - } - index++; - } - //test if load count is the same - assertEquals(data.length, index); - //test if scratch directory was erased - assertFalse(FileSystem.get(job).exists(interPath)); - } - - @Test - public void importSequenceFileTest() throws IOException, ClassNotFoundException, InterruptedException { - String testName = "importSequenceFileTest"; - Path methodTestDir = new Path(getTestDir(), testName); - LOG.info("starting: " + testName); - - String tableName = newTableName(testName).toLowerCase(); - String familyName = "my_family"; - byte[] familyNameBytes = Bytes.toBytes(familyName); - - //include hbase config in conf file - Configuration conf = new Configuration(allConf); - - //create table - createTable(tableName, new String[]{familyName}); - - String data[] = {"1,english:one,spanish:uno", - "2,english:two,spanish:dos", - "3,english:three,spanish:tres"}; - - - // input/output settings - Path inputPath = new Path(methodTestDir, "mr_input"); - getFileSystem().mkdirs(inputPath); - FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); - for (String line : data) - os.write(Bytes.toBytes(line + "\n")); - os.close(); - Path interPath = new Path(methodTestDir, "inter"); - Path scratchPath = new Path(methodTestDir, "scratch"); - - - //create job - Job job = new Job(conf, testName); - job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapWrite.class); - - job.setInputFormatClass(TextInputFormat.class); - TextInputFormat.setInputPaths(job, inputPath); - - job.setOutputFormatClass(SequenceFileOutputFormat.class); - SequenceFileOutputFormat.setOutputPath(job, interPath); - - job.setMapOutputKeyClass(ImmutableBytesWritable.class); - job.setMapOutputValueClass(Put.class); - - job.setOutputKeyClass(ImmutableBytesWritable.class); - job.setOutputValueClass(Put.class); - - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); - - job = new Job(new Configuration(allConf), testName + "_importer"); - assertTrue(ImportSequenceFile.runJob(job, tableName, interPath, scratchPath)); - - //verify - HTable table = new HTable(conf, tableName); - Scan scan = new Scan(); - scan.addFamily(familyNameBytes); - ResultScanner scanner = table.getScanner(scan); - int index = 0; - for (Result result : scanner) { - String vals[] = data[index].toString().split(","); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); - assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); - } - index++; - } - //test if load count is the same - assertEquals(data.length, index); - //test if scratch directory was erased - assertFalse(FileSystem.get(job.getConfiguration()).exists(scratchPath)); - } - - @Test - public void bulkModeHCatOutputFormatTest() throws Exception { - String testName = "bulkModeHCatOutputFormatTest"; - Path methodTestDir = new Path(getTestDir(), testName); - LOG.info("starting: " + testName); - - String databaseName = testName.toLowerCase(); - String dbDir = new Path(methodTestDir, "DB_" + testName).toString(); - String tableName = newTableName(testName).toLowerCase(); - String familyName = "my_family"; - byte[] familyNameBytes = Bytes.toBytes(familyName); - - - //include hbase config in conf file - Configuration conf = new Configuration(allConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); - - - String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir + "'"; - String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + - "(key int, english string, spanish string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + - "TBLPROPERTIES ('" + HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY + "'='true'," + - "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName + ":spanish')"; - - assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); - assertEquals(0, hcatDriver.run(tableQuery).getResponseCode()); - - String data[] = {"1,english:ONE,spanish:UNO", - "2,english:TWO,spanish:DOS", - "3,english:THREE,spanish:TRES"}; - - // input/output settings - Path inputPath = new Path(methodTestDir, "mr_input"); - getFileSystem().mkdirs(inputPath); - //create multiple files so we can test with multiple mappers - for (int i = 0; i < data.length; i++) { - FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile" + i + ".txt")); - os.write(Bytes.toBytes(data[i] + "\n")); - os.close(); - } - - //create job - Job job = new Job(conf, testName); - job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapHCatWrite.class); - - job.setInputFormatClass(TextInputFormat.class); - TextInputFormat.setInputPaths(job, inputPath); - - - job.setOutputFormatClass(HCatOutputFormat.class); - OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, tableName, null); - HCatOutputFormat.setOutput(job, outputJobInfo); - - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(HCatRecord.class); - - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(HCatRecord.class); - - job.setNumReduceTasks(0); - - assertTrue(job.waitForCompletion(true)); - RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); - try { - TableSnapshot snapshot = rm.createSnapshot(databaseName + "." + tableName); - for (String el : snapshot.getColumnFamilies()) { - assertEquals(1, snapshot.getRevision(el)); - } - } finally { - rm.close(); - } - - //verify - HTable table = new HTable(conf, databaseName + "." + tableName); - Scan scan = new Scan(); - scan.addFamily(familyNameBytes); - ResultScanner scanner = table.getScanner(scan); - int index = 0; - for (Result result : scanner) { - String vals[] = data[index].toString().split(","); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); - assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); - assertEquals(1l, result.getColumn(familyNameBytes, Bytes.toBytes(pair[0])).get(0).getTimestamp()); - } - index++; - } - //test if load count is the same - assertEquals(data.length, index); - } - - @Test - public void bulkModeHCatOutputFormatTestWithDefaultDB() throws Exception { - String testName = "bulkModeHCatOutputFormatTestWithDefaultDB"; - Path methodTestDir = new Path(getTestDir(), testName); - - String databaseName = "default"; - String dbDir = new Path(methodTestDir, "DB_" + testName).toString(); - String tableName = newTableName(testName).toLowerCase(); - String familyName = "my_family"; - byte[] familyNameBytes = Bytes.toBytes(familyName); - - - //include hbase config in conf file - Configuration conf = new Configuration(allConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); - - - String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir + "'"; - String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + - "(key int, english string, spanish string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + - "TBLPROPERTIES ('" + HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY + "'='true'," + - "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName + ":spanish')"; - - assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); - assertEquals(0, hcatDriver.run(tableQuery).getResponseCode()); - - String data[] = {"1,english:ONE,spanish:UNO", - "2,english:TWO,spanish:DOS", - "3,english:THREE,spanish:TRES"}; - - // input/output settings - Path inputPath = new Path(methodTestDir, "mr_input"); - getFileSystem().mkdirs(inputPath); - FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); - for (String line : data) - os.write(Bytes.toBytes(line + "\n")); - os.close(); - - //create job - Job job = new Job(conf, testName); - job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapHCatWrite.class); - - job.setInputFormatClass(TextInputFormat.class); - TextInputFormat.setInputPaths(job, inputPath); - - - job.setOutputFormatClass(HCatOutputFormat.class); - OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, tableName, null); - HCatOutputFormat.setOutput(job, outputJobInfo); - - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(HCatRecord.class); - - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(HCatRecord.class); - - job.setNumReduceTasks(0); - - assertTrue(job.waitForCompletion(true)); - - //verify - HTable table = new HTable(conf, tableName); - Scan scan = new Scan(); - scan.addFamily(familyNameBytes); - ResultScanner scanner = table.getScanner(scan); - int index = 0; - for (Result result : scanner) { - String vals[] = data[index].toString().split(","); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); - assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); - } - index++; - } - //test if load count is the same - assertEquals(data.length, index); - } - - @Test - public void bulkModeAbortTest() throws Exception { - String testName = "bulkModeAbortTest"; - Path methodTestDir = new Path(getTestDir(), testName); - String databaseName = testName.toLowerCase(); - String dbDir = new Path(methodTestDir, "DB_" + testName).toString(); - String tableName = newTableName(testName).toLowerCase(); - String familyName = "my_family"; - - // include hbase config in conf file - Configuration conf = new Configuration(allConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); - - String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir - + "'"; - String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + - "(key int, english string, spanish string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + - "TBLPROPERTIES ('" + HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY + "'='true'," + - "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName - + ":spanish')"; - - assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); - assertEquals(0, hcatDriver.run(tableQuery).getResponseCode()); - - String data[] = {"1,english:ONE,spanish:UNO", - "2,english:TWO,spanish:DOS", - "3,english:THREE,spanish:TRES"}; - - Path inputPath = new Path(methodTestDir, "mr_input"); - getFileSystem().mkdirs(inputPath); - // create multiple files so we can test with multiple mappers - for (int i = 0; i < data.length; i++) { - FSDataOutputStream os = getFileSystem().create( - new Path(inputPath, "inputFile" + i + ".txt")); - os.write(Bytes.toBytes(data[i] + "\n")); - os.close(); - } - - Path workingDir = new Path(methodTestDir, "mr_abort"); - OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, - tableName, null); - Job job = configureJob(testName, - conf, workingDir, MapWriteAbortTransaction.class, - outputJobInfo, inputPath); - assertFalse(job.waitForCompletion(true)); - - // verify that revision manager has it as aborted transaction - RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); - try { - TableSnapshot snapshot = rm.createSnapshot(databaseName + "." + tableName); - for (String family : snapshot.getColumnFamilies()) { - assertEquals(1, snapshot.getRevision(family)); - List abortedWriteTransactions = rm.getAbortedWriteTransactions( - databaseName + "." + tableName, family); - assertEquals(1, abortedWriteTransactions.size()); - assertEquals(1, abortedWriteTransactions.get(0).getRevision()); - } - } finally { - rm.close(); - } - - //verify that hbase does not have any of the records. - //Since records are only written during commitJob, - //hbase should not have any records. - HTable table = new HTable(conf, databaseName + "." + tableName); - Scan scan = new Scan(); - scan.addFamily(Bytes.toBytes(familyName)); - ResultScanner scanner = table.getScanner(scan); - assertFalse(scanner.iterator().hasNext()); - - // verify that the storage handler input format returns empty results. - Path outputDir = new Path(getTestDir(), - "mapred/testHBaseTableBulkIgnoreAbortedTransactions"); - FileSystem fs = getFileSystem(); - if (fs.exists(outputDir)) { - fs.delete(outputDir, true); - } - job = new Job(conf, "hbase-bulk-aborted-transaction"); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapReadAbortedTransaction.class); - job.setInputFormatClass(HCatInputFormat.class); - HCatInputFormat.setInput(job, databaseName, tableName); - job.setOutputFormatClass(TextOutputFormat.class); - TextOutputFormat.setOutputPath(job, outputDir); - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(Text.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(Text.class); - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); - } - - private Job configureJob(String jobName, Configuration conf, - Path workingDir, Class mapperClass, - OutputJobInfo outputJobInfo, Path inputPath) throws IOException { - Job job = new Job(conf, jobName); - job.setWorkingDirectory(workingDir); - job.setJarByClass(this.getClass()); - job.setMapperClass(mapperClass); - - job.setInputFormatClass(TextInputFormat.class); - TextInputFormat.setInputPaths(job, inputPath); - job.setOutputFormatClass(HCatOutputFormat.class); - HCatOutputFormat.setOutput(job, outputJobInfo); - - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(HCatRecord.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(HCatRecord.class); - - job.setNumReduceTasks(0); - return job; - } - -} - diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseDirectOutputFormat.java hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseDirectOutputFormat.java deleted file mode 100644 index 4dcef2f..0000000 --- hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseDirectOutputFormat.java +++ /dev/null @@ -1,501 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.client.HTable; -import org.apache.hadoop.hbase.client.Put; -import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hbase.client.ResultScanner; -import org.apache.hadoop.hbase.client.Scan; -import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.hbase.mapred.TableOutputFormat; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapred.JobClient; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.OutputCollector; -import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapred.RunningJob; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.hcatalog.cli.HCatDriver; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.DefaultHCatRecord; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.hbase.snapshot.FamilyRevision; -import org.apache.hcatalog.hbase.snapshot.RevisionManager; -import org.apache.hcatalog.hbase.snapshot.RevisionManagerConfiguration; -import org.apache.hcatalog.hbase.snapshot.TableSnapshot; -import org.apache.hcatalog.hbase.snapshot.Transaction; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.HCatOutputFormat; -import org.apache.hcatalog.mapreduce.OutputJobInfo; -import org.junit.Test; - -import java.io.IOException; -import java.util.Arrays; -import java.util.List; -import java.util.Map; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotSame; -import static org.junit.Assert.assertTrue; - -/** - * Test HBaseDirectOUtputFormat and HBaseHCatStorageHandler using a MiniCluster - */ -public class TestHBaseDirectOutputFormat extends SkeletonHBaseTest { - - private final HiveConf allConf; - private final HCatDriver hcatDriver; - - public TestHBaseDirectOutputFormat() { - allConf = getHiveConf(); - allConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - allConf.set(HiveConf.ConfVars.HADOOPFS.varname, getFileSystem().getUri().toString()); - allConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, new Path(getTestDir(), "warehouse").toString()); - - //Add hbase properties - for (Map.Entry el : getHbaseConf()) - allConf.set(el.getKey(), el.getValue()); - for (Map.Entry el : getJobConf()) - allConf.set(el.getKey(), el.getValue()); - HBaseConfiguration.merge( - allConf, - RevisionManagerConfiguration.create()); - SessionState.start(new CliSessionState(allConf)); - hcatDriver = new HCatDriver(); - } - - @Test - public void directOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException { - String testName = "directOutputFormatTest"; - Path methodTestDir = new Path(getTestDir(), testName); - - String tableName = newTableName(testName).toLowerCase(); - String familyName = "my_family"; - byte[] familyNameBytes = Bytes.toBytes(familyName); - - //include hbase config in conf file - Configuration conf = new Configuration(allConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); - - //create table - createTable(tableName, new String[]{familyName}); - - String data[] = {"1,english:ONE,spanish:UNO", - "2,english:ONE,spanish:DOS", - "3,english:ONE,spanish:TRES"}; - - - // input/output settings - Path inputPath = new Path(methodTestDir, "mr_input"); - getFileSystem().mkdirs(inputPath); - FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); - for (String line : data) - os.write(Bytes.toBytes(line + "\n")); - os.close(); - - //create job - JobConf job = new JobConf(conf); - job.setJobName(testName); - job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapWrite.class); - - job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); - org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath); - - job.setOutputFormat(HBaseDirectOutputFormat.class); - job.set(TableOutputFormat.OUTPUT_TABLE, tableName); - job.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); - - //manually create transaction - RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); - try { - OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null); - Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName)); - outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, - HCatUtil.serialize(txn)); - job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, - HCatUtil.serialize(outputJobInfo)); - } finally { - rm.close(); - } - - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(HCatRecord.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(HCatRecord.class); - job.setNumReduceTasks(0); - - RunningJob runJob = JobClient.runJob(job); - runJob.waitForCompletion(); - assertTrue(runJob.isSuccessful()); - - //verify - HTable table = new HTable(conf, tableName); - Scan scan = new Scan(); - scan.addFamily(familyNameBytes); - ResultScanner scanner = table.getScanner(scan); - int index = 0; - for (Result result : scanner) { - String vals[] = data[index].toString().split(","); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); - assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); - } - index++; - } - assertEquals(data.length, index); - } - - @Test - public void directHCatOutputFormatTest() throws Exception { - String testName = "directHCatOutputFormatTest"; - Path methodTestDir = new Path(getTestDir(), testName); - - String databaseName = testName; - String dbDir = new Path(methodTestDir, "DB_" + testName).toString(); - String tableName = newTableName(testName); - String familyName = "my_family"; - byte[] familyNameBytes = Bytes.toBytes(familyName); - //Table name will be lower case unless specified by hbase.table.name property - String hbaseTableName = (databaseName + "." + tableName).toLowerCase(); - - //include hbase config in conf file - Configuration conf = new Configuration(allConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); - - - String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir + "'"; - String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + - "(key int, english string, spanish string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + - "TBLPROPERTIES (" + - "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName + ":spanish')"; - - assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); - assertEquals(0, hcatDriver.run(tableQuery).getResponseCode()); - - String data[] = {"1,english:ONE,spanish:UNO", - "2,english:ONE,spanish:DOS", - "3,english:ONE,spanish:TRES"}; - - // input/output settings - Path inputPath = new Path(methodTestDir, "mr_input"); - getFileSystem().mkdirs(inputPath); - //create multiple files so we can test with multiple mappers - for (int i = 0; i < data.length; i++) { - FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile" + i + ".txt")); - os.write(Bytes.toBytes(data[i] + "\n")); - os.close(); - } - - //create job - Path workingDir = new Path(methodTestDir, "mr_work"); - OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, - tableName, null); - Job job = configureJob(testName, conf, workingDir, MapHCatWrite.class, - outputJobInfo, inputPath); - assertTrue(job.waitForCompletion(true)); - - RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); - try { - TableSnapshot snapshot = rm.createSnapshot(hbaseTableName); - for (String el : snapshot.getColumnFamilies()) { - assertEquals(1, snapshot.getRevision(el)); - } - } finally { - rm.close(); - } - - //verify - HTable table = new HTable(conf, hbaseTableName); - Scan scan = new Scan(); - scan.addFamily(familyNameBytes); - ResultScanner scanner = table.getScanner(scan); - int index = 0; - for (Result result : scanner) { - String vals[] = data[index].toString().split(","); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); - assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); - assertEquals(1l, result.getColumn(familyNameBytes, Bytes.toBytes(pair[0])).get(0).getTimestamp()); - } - index++; - } - assertEquals(data.length, index); - } - - @Test - public void directModeAbortTest() throws Exception { - String testName = "directModeAbortTest"; - Path methodTestDir = new Path(getTestDir(), testName); - String databaseName = testName; - String dbDir = new Path(methodTestDir, "DB_" + testName).toString(); - String tableName = newTableName(testName); - String familyName = "my_family"; - byte[] familyNameBytes = Bytes.toBytes(familyName); - //Table name as specified by hbase.table.name property - String hbaseTableName = tableName; - - // include hbase config in conf file - Configuration conf = new Configuration(allConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); - - String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir - + "'"; - String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + - "(key int, english string, spanish string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" + - "TBLPROPERTIES (" + - "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName + - ":spanish','hbase.table.name'='" + hbaseTableName + "')"; - - assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); - assertEquals(0, hcatDriver.run(tableQuery).getResponseCode()); - - String data[] = {"1,english:ONE,spanish:UNO", - "2,english:TWO,spanish:DOS", - "3,english:THREE,spanish:TRES"}; - - Path inputPath = new Path(methodTestDir, "mr_input"); - getFileSystem().mkdirs(inputPath); - // create multiple files so we can test with multiple mappers - for (int i = 0; i < data.length; i++) { - FSDataOutputStream os = getFileSystem().create( - new Path(inputPath, "inputFile" + i + ".txt")); - os.write(Bytes.toBytes(data[i] + "\n")); - os.close(); - } - - Path workingDir = new Path(methodTestDir, "mr_abort"); - OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, - tableName, null); - Job job = configureJob(testName, conf, workingDir, MapWriteAbortTransaction.class, - outputJobInfo, inputPath); - assertFalse(job.waitForCompletion(true)); - - // verify that revision manager has it as aborted transaction - RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); - try { - TableSnapshot snapshot = rm.createSnapshot(hbaseTableName); - for (String family : snapshot.getColumnFamilies()) { - assertEquals(1, snapshot.getRevision(family)); - List abortedWriteTransactions = rm.getAbortedWriteTransactions( - hbaseTableName, family); - assertEquals(1, abortedWriteTransactions.size()); - assertEquals(1, abortedWriteTransactions.get(0).getRevision()); - } - } finally { - rm.close(); - } - - // verify that hbase has the records of the successful maps. - HTable table = new HTable(conf, hbaseTableName); - Scan scan = new Scan(); - scan.addFamily(familyNameBytes); - ResultScanner scanner = table.getScanner(scan); - int count = 0; - for (Result result : scanner) { - String key = Bytes.toString(result.getRow()); - assertNotSame(MapWriteAbortTransaction.failedKey, key); - int index = Integer.parseInt(key) - 1; - String vals[] = data[index].toString().split(","); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); - assertEquals(pair[1], - Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); - assertEquals(1l, result.getColumn(familyNameBytes, Bytes.toBytes(pair[0])).get(0) - .getTimestamp()); - } - count++; - } - assertEquals(data.length - 1, count); - - // verify that the inputformat returns empty results. - Path outputDir = new Path(getTestDir(), - "mapred/testHBaseTableIgnoreAbortedTransactions"); - FileSystem fs = getFileSystem(); - if (fs.exists(outputDir)) { - fs.delete(outputDir, true); - } - job = new Job(conf, "hbase-aborted-transaction"); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapReadAbortedTransaction.class); - job.setInputFormatClass(HCatInputFormat.class); - HCatInputFormat.setInput(job, databaseName, tableName); - job.setOutputFormatClass(TextOutputFormat.class); - TextOutputFormat.setOutputPath(job, outputDir); - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(Text.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(Text.class); - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); - } - - private Job configureJob(String jobName, Configuration conf, - Path workingDir, Class mapperClass, - OutputJobInfo outputJobInfo, Path inputPath) throws IOException { - Job job = new Job(conf, jobName); - job.setWorkingDirectory(workingDir); - job.setJarByClass(this.getClass()); - job.setMapperClass(mapperClass); - - job.setInputFormatClass(TextInputFormat.class); - TextInputFormat.setInputPaths(job, inputPath); - job.setOutputFormatClass(HCatOutputFormat.class); - HCatOutputFormat.setOutput(job, outputJobInfo); - String txnString = job.getConfiguration().get(HBaseConstants.PROPERTY_WRITE_TXN_KEY); - //Test passing in same OutputJobInfo multiple times and verify 1 transaction is created - String jobString = job.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO); - outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(jobString); - Job job2 = new Job(conf); - HCatOutputFormat.setOutput(job2, outputJobInfo); - assertEquals(txnString, job2.getConfiguration().get(HBaseConstants.PROPERTY_WRITE_TXN_KEY)); - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(HCatRecord.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(HCatRecord.class); - - job.setNumReduceTasks(0); - return job; - } - - public static class MapHCatWrite extends Mapper { - - @Override - public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - HCatRecord record = new DefaultHCatRecord(3); - HCatSchema schema = jobInfo.getOutputSchema(); - String vals[] = value.toString().split(","); - record.setInteger("key", schema, Integer.parseInt(vals[0])); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - record.set(pair[0], schema, pair[1]); - } - context.write(null, record); - } - } - - public static class MapWrite implements org.apache.hadoop.mapred.Mapper { - - @Override - public void configure(JobConf job) { - } - - @Override - public void close() throws IOException { - } - - @Override - public void map(LongWritable key, Text value, - OutputCollector output, Reporter reporter) - throws IOException { - String vals[] = value.toString().split(","); - Put put = new Put(Bytes.toBytes(vals[0])); - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - put.add(Bytes.toBytes("my_family"), - Bytes.toBytes(pair[0]), - Bytes.toBytes(pair[1])); - } - output.collect(null, put); - } - } - - static class MapWriteAbortTransaction extends Mapper { - public static String failedKey; - private static int count = 0; - - @Override - public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); - HCatRecord record = new DefaultHCatRecord(3); - HCatSchema schema = jobInfo.getOutputSchema(); - String vals[] = value.toString().split(","); - record.setInteger("key", schema, Integer.parseInt(vals[0])); - synchronized (MapWriteAbortTransaction.class) { - if (count == 2) { - failedKey = vals[0]; - throw new IOException("Failing map to test abort"); - } - for (int i = 1; i < vals.length; i++) { - String pair[] = vals[i].split(":"); - record.set(pair[0], schema, pair[1]); - } - context.write(null, record); - count++; - } - - } - - } - - static class MapReadAbortedTransaction - extends - Mapper, Text> { - - @Override - public void run(Context context) throws IOException, - InterruptedException { - setup(context); - if (context.nextKeyValue()) { - map(context.getCurrentKey(), context.getCurrentValue(), context); - while (context.nextKeyValue()) { - map(context.getCurrentKey(), context.getCurrentValue(), - context); - } - throw new IOException("There should have been no records"); - } - cleanup(context); - } - - @Override - public void map(ImmutableBytesWritable key, HCatRecord value, - Context context) throws IOException, InterruptedException { - System.out.println("HCat record value" + value.toString()); - } - } -} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseHCatStorageHandler.java hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseHCatStorageHandler.java deleted file mode 100644 index a97ab4e..0000000 --- hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseHCatStorageHandler.java +++ /dev/null @@ -1,241 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.hbase; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.net.URI; -import java.util.Map; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.HColumnDescriptor; -import org.apache.hadoop.hbase.HTableDescriptor; -import org.apache.hadoop.hbase.client.HBaseAdmin; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hcatalog.cli.HCatDriver; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.hbase.snapshot.RevisionManager; -import org.apache.hcatalog.hbase.snapshot.RevisionManagerConfiguration; -import org.apache.zookeeper.KeeperException.NoNodeException; -import org.junit.Test; - -public class TestHBaseHCatStorageHandler extends SkeletonHBaseTest { - - private static HiveConf hcatConf; - private static HCatDriver hcatDriver; - private static Warehouse wh; - - public void Initialize() throws Exception { - - hcatConf = getHiveConf(); - hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - URI fsuri = getFileSystem().getUri(); - Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), - getTestDir()); - hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); - hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); - - //Add hbase properties - for (Map.Entry el : getHbaseConf()) { - if (el.getKey().startsWith("hbase.")) { - hcatConf.set(el.getKey(), el.getValue()); - } - } - HBaseConfiguration.merge( - hcatConf, - RevisionManagerConfiguration.create()); - - SessionState.start(new CliSessionState(hcatConf)); - hcatDriver = new HCatDriver(); - - } - - @Test - public void testTableCreateDrop() throws Exception { - Initialize(); - - hcatDriver.run("drop table test_table"); - CommandProcessorResponse response = hcatDriver - .run("create table test_table(key int, value string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:val')"); - - assertEquals(0, response.getResponseCode()); - - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - boolean doesTableExist = hAdmin.tableExists("test_table"); - - assertTrue(doesTableExist); - - RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hcatConf); - rm.open(); - //Should be able to successfully query revision manager - rm.getAbortedWriteTransactions("test_table", "cf1"); - - hcatDriver.run("drop table test_table"); - doesTableExist = hAdmin.tableExists("test_table"); - assertTrue(doesTableExist == false); - - try { - rm.getAbortedWriteTransactions("test_table", "cf1"); - } catch (Exception e) { - assertTrue(e.getCause() instanceof NoNodeException); - } - rm.close(); - - } - - @Test - public void testTableCreateDropDifferentCase() throws Exception { - Initialize(); - - hcatDriver.run("drop table test_Table"); - CommandProcessorResponse response = hcatDriver - .run("create table test_Table(key int, value string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:val')"); - - assertEquals(0, response.getResponseCode()); - - //HBase table gets created with lower case unless specified as a table property. - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - boolean doesTableExist = hAdmin.tableExists("test_table"); - - assertTrue(doesTableExist); - - RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hcatConf); - rm.open(); - //Should be able to successfully query revision manager - rm.getAbortedWriteTransactions("test_table", "cf1"); - - hcatDriver.run("drop table test_table"); - doesTableExist = hAdmin.tableExists("test_table"); - assertTrue(doesTableExist == false); - - try { - rm.getAbortedWriteTransactions("test_table", "cf1"); - } catch (Exception e) { - assertTrue(e.getCause() instanceof NoNodeException); - } - rm.close(); - - } - - @Test - public void testTableCreateDropCaseSensitive() throws Exception { - Initialize(); - - hcatDriver.run("drop table test_Table"); - CommandProcessorResponse response = hcatDriver - .run("create table test_Table(key int, value string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:val'," + - " 'hbase.table.name'='CaseSensitiveTable')"); - - assertEquals(0, response.getResponseCode()); - - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - boolean doesTableExist = hAdmin.tableExists("CaseSensitiveTable"); - - assertTrue(doesTableExist); - - RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hcatConf); - rm.open(); - //Should be able to successfully query revision manager - rm.getAbortedWriteTransactions("CaseSensitiveTable", "cf1"); - - hcatDriver.run("drop table test_table"); - doesTableExist = hAdmin.tableExists("CaseSensitiveTable"); - assertTrue(doesTableExist == false); - - try { - rm.getAbortedWriteTransactions("CaseSensitiveTable", "cf1"); - } catch (Exception e) { - assertTrue(e.getCause() instanceof NoNodeException); - } - rm.close(); - - } - - @Test - public void testTableDropNonExistent() throws Exception { - Initialize(); - - hcatDriver.run("drop table mytable"); - CommandProcessorResponse response = hcatDriver - .run("create table mytable(key int, value string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:val')"); - - assertEquals(0, response.getResponseCode()); - - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - boolean doesTableExist = hAdmin.tableExists("mytable"); - assertTrue(doesTableExist); - - //Now delete the table from hbase - if (hAdmin.isTableEnabled("mytable")) { - hAdmin.disableTable("mytable"); - } - hAdmin.deleteTable("mytable"); - doesTableExist = hAdmin.tableExists("mytable"); - assertTrue(doesTableExist == false); - - CommandProcessorResponse responseTwo = hcatDriver.run("drop table mytable"); - assertTrue(responseTwo.getResponseCode() == 0); - - } - - @Test - public void testTableCreateExternal() throws Exception { - - String tableName = "testTable"; - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - - HTableDescriptor tableDesc = new HTableDescriptor(tableName); - tableDesc.addFamily(new HColumnDescriptor(Bytes.toBytes("key"))); - tableDesc.addFamily(new HColumnDescriptor(Bytes.toBytes("familyone"))); - tableDesc.addFamily(new HColumnDescriptor(Bytes.toBytes("familytwo"))); - - hAdmin.createTable(tableDesc); - boolean doesTableExist = hAdmin.tableExists(tableName); - assertTrue(doesTableExist); - - hcatDriver.run("drop table mytabletwo"); - CommandProcessorResponse response = hcatDriver - .run("create external table mytabletwo(key int, valueone string, valuetwo string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key,familyone:val,familytwo:val'," + - "'hbase.table.name'='testTable')"); - - assertEquals(0, response.getResponseCode()); - - } - - -} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseInputFormat.java hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseInputFormat.java deleted file mode 100644 index 9e02d83..0000000 --- hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestHBaseInputFormat.java +++ /dev/null @@ -1,609 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.hbase; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.net.URI; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.client.HBaseAdmin; -import org.apache.hadoop.hbase.client.HTable; -import org.apache.hadoop.hbase.client.Put; -import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.hbase.mapreduce.TableInputFormat; -import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapred.JobClient; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.OutputCollector; -import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapred.RunningJob; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.hcatalog.cli.HCatDriver; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.HCatRecord; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchema; -import org.apache.hcatalog.hbase.snapshot.RevisionManager; -import org.apache.hcatalog.hbase.snapshot.RevisionManagerConfiguration; -import org.apache.hcatalog.hbase.snapshot.Transaction; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.InputJobInfo; -import org.apache.hcatalog.mapreduce.PartInfo; -import org.junit.Test; - -public class TestHBaseInputFormat extends SkeletonHBaseTest { - - private static HiveConf hcatConf; - private static HCatDriver hcatDriver; - private final byte[] FAMILY = Bytes.toBytes("testFamily"); - private final byte[] QUALIFIER1 = Bytes.toBytes("testQualifier1"); - private final byte[] QUALIFIER2 = Bytes.toBytes("testQualifier2"); - - public TestHBaseInputFormat() throws Exception { - hcatConf = getHiveConf(); - hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - URI fsuri = getFileSystem().getUri(); - Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), - getTestDir()); - hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); - hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); - - //Add hbase properties - - for (Map.Entry el : getHbaseConf()) { - if (el.getKey().startsWith("hbase.")) { - hcatConf.set(el.getKey(), el.getValue()); - } - } - HBaseConfiguration.merge(hcatConf, - RevisionManagerConfiguration.create()); - - - SessionState.start(new CliSessionState(hcatConf)); - hcatDriver = new HCatDriver(); - - } - - private List generatePuts(int num, String tableName) throws IOException { - - List columnFamilies = Arrays.asList("testFamily"); - RevisionManager rm = null; - List myPuts; - try { - rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hcatConf); - rm.open(); - myPuts = new ArrayList(); - for (int i = 1; i <= num; i++) { - Put put = new Put(Bytes.toBytes("testRow")); - put.add(FAMILY, QUALIFIER1, i, Bytes.toBytes("textValue-" + i)); - put.add(FAMILY, QUALIFIER2, i, Bytes.toBytes("textValue-" + i)); - myPuts.add(put); - Transaction tsx = rm.beginWriteTransaction(tableName, - columnFamilies); - rm.commitWriteTransaction(tsx); - } - } finally { - if (rm != null) - rm.close(); - } - - return myPuts; - } - - private void populateHBaseTable(String tName, int revisions) throws IOException { - List myPuts = generatePuts(revisions, tName); - HTable table = new HTable(getHbaseConf(), Bytes.toBytes(tName)); - table.put(myPuts); - } - - private long populateHBaseTableQualifier1(String tName, int value, Boolean commit) - throws IOException { - List columnFamilies = Arrays.asList("testFamily"); - RevisionManager rm = null; - List myPuts = new ArrayList(); - long revision; - try { - rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hcatConf); - rm.open(); - Transaction tsx = rm.beginWriteTransaction(tName, columnFamilies); - - Put put = new Put(Bytes.toBytes("testRow")); - revision = tsx.getRevisionNumber(); - put.add(FAMILY, QUALIFIER1, revision, - Bytes.toBytes("textValue-" + value)); - myPuts.add(put); - - // If commit is null it is left as a running transaction - if (commit != null) { - if (commit) { - rm.commitWriteTransaction(tsx); - } else { - rm.abortWriteTransaction(tsx); - } - } - } finally { - if (rm != null) - rm.close(); - } - HTable table = new HTable(getHbaseConf(), Bytes.toBytes(tName)); - table.put(myPuts); - return revision; - } - - @Test - public void TestHBaseTableReadMR() throws Exception { - String tableName = newTableName("MyTable"); - String databaseName = newTableName("MyDatabase"); - //Table name will be lower case unless specified by hbase.table.name property - String hbaseTableName = (databaseName + "." + tableName).toLowerCase(); - String db_dir = new Path(getTestDir(), "hbasedb").toString(); - - String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" - + db_dir + "'"; - String tableQuery = "CREATE TABLE " + databaseName + "." + tableName - + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key,testFamily:testQualifier1,testFamily:testQualifier2')"; - - CommandProcessorResponse responseOne = hcatDriver.run(dbquery); - assertEquals(0, responseOne.getResponseCode()); - CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); - assertEquals(0, responseTwo.getResponseCode()); - - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - boolean doesTableExist = hAdmin.tableExists(hbaseTableName); - assertTrue(doesTableExist); - - populateHBaseTable(hbaseTableName, 5); - Configuration conf = new Configuration(hcatConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(getHiveConf().getAllProperties())); - - // output settings - Path outputDir = new Path(getTestDir(), "mapred/testHbaseTableMRRead"); - FileSystem fs = getFileSystem(); - if (fs.exists(outputDir)) { - fs.delete(outputDir, true); - } - // create job - Job job = new Job(conf, "hbase-mr-read-test"); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapReadHTable.class); - MapReadHTable.resetCounters(); - - job.setInputFormatClass(HCatInputFormat.class); - HCatInputFormat.setInput(job.getConfiguration(), databaseName, tableName); - job.setOutputFormatClass(TextOutputFormat.class); - TextOutputFormat.setOutputPath(job, outputDir); - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(Text.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(Text.class); - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); - // Note: These asserts only works in case of LocalJobRunner as they run in same jvm. - // If using MiniMRCluster, the tests will have to be modified. - assertFalse(MapReadHTable.error); - assertEquals(MapReadHTable.count, 1); - - String dropTableQuery = "DROP TABLE " + hbaseTableName; - CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); - assertEquals(0, responseThree.getResponseCode()); - - boolean isHbaseTableThere = hAdmin.tableExists(hbaseTableName); - assertFalse(isHbaseTableThere); - - String dropDB = "DROP DATABASE " + databaseName; - CommandProcessorResponse responseFour = hcatDriver.run(dropDB); - assertEquals(0, responseFour.getResponseCode()); - } - - @Test - public void TestHBaseTableProjectionReadMR() throws Exception { - - String tableName = newTableName("MyTable"); - //Table name as specified by hbase.table.name property - String hbaseTableName = "MyDB_" + tableName; - String tableQuery = "CREATE TABLE " + tableName - + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " - + "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=" - + "':key,testFamily:testQualifier1,testFamily:testQualifier2'," - + "'hbase.table.name'='" + hbaseTableName + "')"; - - CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); - assertEquals(0, responseTwo.getResponseCode()); - - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - boolean doesTableExist = hAdmin.tableExists(hbaseTableName); - assertTrue(doesTableExist); - - populateHBaseTable(hbaseTableName, 5); - - Configuration conf = new Configuration(hcatConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(getHiveConf().getAllProperties())); - - // output settings - Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableProjectionReadMR"); - FileSystem fs = getFileSystem(); - if (fs.exists(outputDir)) { - fs.delete(outputDir, true); - } - // create job - Job job = new Job(conf, "hbase-column-projection"); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapReadProjHTable.class); - job.setInputFormatClass(HCatInputFormat.class); - HCatInputFormat.setOutputSchema(job, getProjectionSchema()); - HCatInputFormat.setInput(job, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); - job.setOutputFormatClass(TextOutputFormat.class); - TextOutputFormat.setOutputPath(job, outputDir); - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(Text.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(Text.class); - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); - assertFalse(MapReadProjHTable.error); - assertEquals(MapReadProjHTable.count, 1); - - String dropTableQuery = "DROP TABLE " + tableName; - CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); - assertEquals(0, responseThree.getResponseCode()); - - boolean isHbaseTableThere = hAdmin.tableExists(hbaseTableName); - assertFalse(isHbaseTableThere); - } - - @Test - public void TestHBaseInputFormatProjectionReadMR() throws Exception { - - String tableName = newTableName("mytable"); - String tableQuery = "CREATE TABLE " + tableName - + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key," + - "testFamily:testQualifier1,testFamily:testQualifier2')"; - - CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); - assertEquals(0, responseTwo.getResponseCode()); - - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - boolean doesTableExist = hAdmin.tableExists(tableName); - assertTrue(doesTableExist); - - populateHBaseTable(tableName, 5); - - Configuration conf = new Configuration(hcatConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(getHiveConf().getAllProperties())); - - // output settings - Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableProjectionReadMR"); - FileSystem fs = getFileSystem(); - if (fs.exists(outputDir)) { - fs.delete(outputDir, true); - } - // create job - JobConf job = new JobConf(conf); - job.setJobName("hbase-scan-column"); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapReadProjectionHTable.class); - job.setInputFormat(HBaseInputFormat.class); - - //Configure projection schema - job.set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(getProjectionSchema())); - Job newJob = new Job(job); - HCatInputFormat.setInput(newJob, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); - String inputJobString = newJob.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO); - InputJobInfo info = (InputJobInfo) HCatUtil.deserialize(inputJobString); - job.set(HCatConstants.HCAT_KEY_JOB_INFO, inputJobString); - for (PartInfo partinfo : info.getPartitions()) { - for (Entry entry : partinfo.getJobProperties().entrySet()) - job.set(entry.getKey(), entry.getValue()); - } - assertEquals("testFamily:testQualifier1", job.get(TableInputFormat.SCAN_COLUMNS)); - - job.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class); - org.apache.hadoop.mapred.TextOutputFormat.setOutputPath(job, outputDir); - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(Text.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(Text.class); - job.setNumReduceTasks(0); - - RunningJob runJob = JobClient.runJob(job); - runJob.waitForCompletion(); - assertTrue(runJob.isSuccessful()); - assertFalse(MapReadProjHTable.error); - assertEquals(MapReadProjHTable.count, 1); - - String dropTableQuery = "DROP TABLE " + tableName; - CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); - assertEquals(0, responseThree.getResponseCode()); - - boolean isHbaseTableThere = hAdmin.tableExists(tableName); - assertFalse(isHbaseTableThere); - } - - @Test - public void TestHBaseTableIgnoreAbortedTransactions() throws Exception { - String tableName = newTableName("mytable"); - String tableQuery = "CREATE TABLE " + tableName - + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key," + - "testFamily:testQualifier1,testFamily:testQualifier2')"; - - CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); - assertEquals(0, responseTwo.getResponseCode()); - - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - boolean doesTableExist = hAdmin.tableExists(tableName); - assertTrue(doesTableExist); - - populateHBaseTable(tableName, 5); - populateHBaseTableQualifier1(tableName, 6, false); - populateHBaseTableQualifier1(tableName, 7, false); - - Configuration conf = new Configuration(hcatConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(getHiveConf().getAllProperties())); - - Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableIgnoreAbortedTransactions"); - FileSystem fs = getFileSystem(); - if (fs.exists(outputDir)) { - fs.delete(outputDir, true); - } - Job job = new Job(conf, "hbase-aborted-transaction"); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapReadHTable.class); - MapReadHTable.resetCounters(); - job.setInputFormatClass(HCatInputFormat.class); - HCatInputFormat.setInput(job, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); - job.setOutputFormatClass(TextOutputFormat.class); - TextOutputFormat.setOutputPath(job, outputDir); - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(Text.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(Text.class); - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); - // Verify that the records do not contain aborted transaction - // revisions 6 and 7 for testFamily:testQualifier1 and - // fetches revision 5 for both testQualifier1 and testQualifier2 - assertFalse(MapReadHTable.error); - assertEquals(1, MapReadHTable.count); - - String dropTableQuery = "DROP TABLE " + tableName; - CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); - assertEquals(0, responseThree.getResponseCode()); - - boolean isHbaseTableThere = hAdmin.tableExists(tableName); - assertFalse(isHbaseTableThere); - } - - @Test - public void TestHBaseTableIgnoreAbortedAndRunningTransactions() throws Exception { - String tableName = newTableName("mytable"); - String tableQuery = "CREATE TABLE " + tableName - + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key," + - "testFamily:testQualifier1,testFamily:testQualifier2')"; - - CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); - assertEquals(0, responseTwo.getResponseCode()); - - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - boolean doesTableExist = hAdmin.tableExists(tableName); - assertTrue(doesTableExist); - - populateHBaseTable(tableName, 2); - populateHBaseTableQualifier1(tableName, 3, Boolean.TRUE); //Committed transaction - populateHBaseTableQualifier1(tableName, 4, null); //Running transaction - populateHBaseTableQualifier1(tableName, 5, Boolean.FALSE); //Aborted transaction - populateHBaseTableQualifier1(tableName, 6, Boolean.TRUE); //Committed transaction - populateHBaseTableQualifier1(tableName, 7, null); //Running Transaction - populateHBaseTableQualifier1(tableName, 8, Boolean.FALSE); //Aborted Transaction - - Configuration conf = new Configuration(hcatConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(getHiveConf().getAllProperties())); - - Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableIgnoreAbortedTransactions"); - FileSystem fs = getFileSystem(); - if (fs.exists(outputDir)) { - fs.delete(outputDir, true); - } - Job job = new Job(conf, "hbase-running-aborted-transaction"); - job.setJarByClass(this.getClass()); - job.setMapperClass(MapReadHTableRunningAbort.class); - job.setInputFormatClass(HCatInputFormat.class); - HCatInputFormat.setInput(job, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); - job.setOutputFormatClass(TextOutputFormat.class); - TextOutputFormat.setOutputPath(job, outputDir); - job.setMapOutputKeyClass(BytesWritable.class); - job.setMapOutputValueClass(Text.class); - job.setOutputKeyClass(BytesWritable.class); - job.setOutputValueClass(Text.class); - job.setNumReduceTasks(0); - assertTrue(job.waitForCompletion(true)); - // Verify that the records do not contain running and aborted transaction - // and it fetches revision 2 for testQualifier1 and testQualifier2 - assertFalse(MapReadHTableRunningAbort.error); - assertEquals(1, MapReadHTableRunningAbort.count); - - String dropTableQuery = "DROP TABLE " + tableName; - CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); - assertEquals(0, responseThree.getResponseCode()); - - boolean isHbaseTableThere = hAdmin.tableExists(tableName); - assertFalse(isHbaseTableThere); - } - - - static class MapReadHTable - extends - Mapper, Text> { - - static boolean error = false; - static int count = 0; - - @Override - public void map(ImmutableBytesWritable key, HCatRecord value, - Context context) throws IOException, InterruptedException { - System.out.println("HCat record value" + value.toString()); - boolean correctValues = (value.size() == 3) - && (value.get(0).toString()).equalsIgnoreCase("testRow") - && (value.get(1).toString()).equalsIgnoreCase("textValue-5") - && (value.get(2).toString()).equalsIgnoreCase("textValue-5"); - - if (correctValues == false) { - error = true; - } - count++; - } - - public static void resetCounters() { - error = false; - count = 0; - } - } - - static class MapReadProjHTable - extends - Mapper, Text> { - - static boolean error = false; - static int count = 0; - - @Override - public void map(ImmutableBytesWritable key, HCatRecord value, - Context context) throws IOException, InterruptedException { - System.out.println("HCat record value" + value.toString()); - boolean correctValues = (value.size() == 2) - && (value.get(0).toString()).equalsIgnoreCase("testRow") - && (value.get(1).toString()).equalsIgnoreCase("textValue-5"); - - if (correctValues == false) { - error = true; - } - count++; - } - } - - static class MapReadProjectionHTable - implements org.apache.hadoop.mapred.Mapper, Text> { - - static boolean error = false; - static int count = 0; - - @Override - public void configure(JobConf job) { - } - - @Override - public void close() throws IOException { - } - - @Override - public void map(ImmutableBytesWritable key, Result result, - OutputCollector, Text> output, Reporter reporter) - throws IOException { - System.out.println("Result " + result.toString()); - List list = result.list(); - boolean correctValues = (list.size() == 1) - && (Bytes.toString(list.get(0).getRow())).equalsIgnoreCase("testRow") - && (Bytes.toString(list.get(0).getValue())).equalsIgnoreCase("textValue-5") - && (Bytes.toString(list.get(0).getFamily())).equalsIgnoreCase("testFamily") - && (Bytes.toString(list.get(0).getQualifier())).equalsIgnoreCase("testQualifier1"); - - if (correctValues == false) { - error = true; - } - count++; - } - } - - static class MapReadHTableRunningAbort - extends - Mapper, Text> { - - static boolean error = false; - static int count = 0; - - @Override - public void map(ImmutableBytesWritable key, HCatRecord value, - Context context) throws IOException, InterruptedException { - System.out.println("HCat record value" + value.toString()); - boolean correctValues = (value.size() == 3) - && (value.get(0).toString()).equalsIgnoreCase("testRow") - && (value.get(1).toString()).equalsIgnoreCase("textValue-3") - && (value.get(2).toString()).equalsIgnoreCase("textValue-2"); - - if (correctValues == false) { - error = true; - } - count++; - } - } - - private HCatSchema getProjectionSchema() throws HCatException { - - HCatSchema schema = new HCatSchema(new ArrayList()); - schema.append(new HCatFieldSchema("key", HCatFieldSchema.Type.STRING, - "")); - schema.append(new HCatFieldSchema("testqualifier1", - HCatFieldSchema.Type.STRING, "")); - return schema; - } - - -} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestSnapshots.java hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestSnapshots.java deleted file mode 100644 index 579922d..0000000 --- hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/TestSnapshots.java +++ /dev/null @@ -1,141 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase; - -import static org.junit.Assert.assertEquals; - -import java.net.URI; -import java.util.HashMap; -import java.util.Map; -import java.util.Properties; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hcatalog.cli.HCatDriver; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.hbase.snapshot.TableSnapshot; -import org.apache.hcatalog.mapreduce.HCatInputFormat; -import org.apache.hcatalog.mapreduce.InputJobInfo; -import org.junit.Test; - -public class TestSnapshots extends SkeletonHBaseTest { - private static HiveConf hcatConf; - private static HCatDriver hcatDriver; - - public void Initialize() throws Exception { - hcatConf = getHiveConf(); - hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - URI fsuri = getFileSystem().getUri(); - Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), - getTestDir()); - hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); - hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); - - //Add hbase properties - - for (Map.Entry el : getHbaseConf()) { - if (el.getKey().startsWith("hbase.")) { - hcatConf.set(el.getKey(), el.getValue()); - } - } - - SessionState.start(new CliSessionState(hcatConf)); - hcatDriver = new HCatDriver(); - - } - - @Test - public void TestSnapshotConversion() throws Exception { - Initialize(); - String tableName = newTableName("mytableOne"); - String databaseName = newTableName("mydatabase"); - String fullyQualTableName = databaseName + "." + tableName; - String db_dir = new Path(getTestDir(), "hbasedb").toString(); - String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" - + db_dir + "'"; - String tableQuery = "CREATE TABLE " + fullyQualTableName - + "(key string, value1 string, value2 string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:q1,cf2:q2')"; - - CommandProcessorResponse cmdResponse = hcatDriver.run(dbquery); - assertEquals(0, cmdResponse.getResponseCode()); - cmdResponse = hcatDriver.run(tableQuery); - assertEquals(0, cmdResponse.getResponseCode()); - - Configuration conf = new Configuration(hcatConf); - conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, - HCatUtil.serialize(getHiveConf().getAllProperties())); - Job job = new Job(conf); - Properties properties = new Properties(); - properties.setProperty(HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY, "dummysnapshot"); - HCatInputFormat.setInput(job, databaseName, tableName).setProperties(properties); - String modifiedInputInfo = job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO); - InputJobInfo inputInfo = (InputJobInfo) HCatUtil.deserialize(modifiedInputInfo); - - Map revMap = new HashMap(); - revMap.put("cf1", 3L); - revMap.put("cf2", 5L); - TableSnapshot hbaseSnapshot = new TableSnapshot(fullyQualTableName, revMap, -1); - HCatTableSnapshot hcatSnapshot = HBaseRevisionManagerUtil.convertSnapshot(hbaseSnapshot, inputInfo.getTableInfo()); - - assertEquals(hcatSnapshot.getRevision("value1"), 3); - assertEquals(hcatSnapshot.getRevision("value2"), 5); - - String dropTable = "DROP TABLE " + fullyQualTableName; - cmdResponse = hcatDriver.run(dropTable); - assertEquals(0, cmdResponse.getResponseCode()); - - tableName = newTableName("mytableTwo"); - fullyQualTableName = databaseName + "." + tableName; - tableQuery = "CREATE TABLE " + fullyQualTableName - + "(key string, value1 string, value2 string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:q1,cf1:q2')"; - cmdResponse = hcatDriver.run(tableQuery); - assertEquals(0, cmdResponse.getResponseCode()); - revMap.clear(); - revMap.put("cf1", 3L); - hbaseSnapshot = new TableSnapshot(fullyQualTableName, revMap, -1); - HCatInputFormat.setInput(job, databaseName, tableName).setProperties(properties); - modifiedInputInfo = job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO); - inputInfo = (InputJobInfo) HCatUtil.deserialize(modifiedInputInfo); - hcatSnapshot = HBaseRevisionManagerUtil.convertSnapshot(hbaseSnapshot, inputInfo.getTableInfo()); - assertEquals(hcatSnapshot.getRevision("value1"), 3); - assertEquals(hcatSnapshot.getRevision("value2"), 3); - - dropTable = "DROP TABLE " + fullyQualTableName; - cmdResponse = hcatDriver.run(dropTable); - assertEquals(0, cmdResponse.getResponseCode()); - - String dropDatabase = "DROP DATABASE IF EXISTS " + databaseName + "CASCADE"; - cmdResponse = hcatDriver.run(dropDatabase); - assertEquals(0, cmdResponse.getResponseCode()); - } -} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/IDGenClient.java hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/IDGenClient.java deleted file mode 100644 index 353402b..0000000 --- hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/IDGenClient.java +++ /dev/null @@ -1,72 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.hbase.snapshot; - -import java.util.HashMap; -import java.util.Map; -import java.util.Random; - -public class IDGenClient extends Thread { - - String connectionStr; - String base_dir; - ZKUtil zkutil; - Random sleepTime = new Random(); - int runtime; - HashMap idMap; - String tableName; - - IDGenClient(String connectionStr, String base_dir, int time, String tableName) { - super(); - this.connectionStr = connectionStr; - this.base_dir = base_dir; - this.zkutil = new ZKUtil(connectionStr, base_dir); - this.runtime = time; - idMap = new HashMap(); - this.tableName = tableName; - } - - /* - * @see java.lang.Runnable#run() - */ - @Override - public void run() { - long startTime = System.currentTimeMillis(); - int timeElapsed = 0; - while( timeElapsed <= runtime){ - try { - long id = zkutil.nextId(tableName); - idMap.put(System.currentTimeMillis(), id); - - int sTime = sleepTime.nextInt(2); - Thread.sleep(sTime * 100); - } catch (Exception e) { - e.printStackTrace(); - } - - timeElapsed = (int) Math.ceil((System.currentTimeMillis() - startTime)/(double)1000); - } - - } - - Map getIdMap(){ - return idMap; - } - -} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestIDGenerator.java hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestIDGenerator.java deleted file mode 100644 index 9b0cd01..0000000 --- hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestIDGenerator.java +++ /dev/null @@ -1,99 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.hbase.snapshot; - -import static org.junit.Assert.assertTrue; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; - -import org.apache.hcatalog.hbase.SkeletonHBaseTest; -import org.junit.Assert; -import org.junit.Test; - -public class TestIDGenerator extends SkeletonHBaseTest { - - @Test - public void testIDGeneration() throws Exception { - - int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); - String servers = getHbaseConf().get("hbase.zookeeper.quorum"); - String[] splits = servers.split(","); - StringBuffer sb = new StringBuffer(); - for (String split : splits) { - sb.append(split); - sb.append(':'); - sb.append(port); - } - ZKUtil zkutil = new ZKUtil(sb.toString(), "/rm_base"); - - String tableName = "myTable"; - long initId = zkutil.nextId(tableName); - for (int i = 0; i < 10; i++) { - long id = zkutil.nextId(tableName); - Assert.assertEquals(initId + (i + 1), id); - } - } - - @Test - public void testMultipleClients() throws InterruptedException { - - int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); - String servers = getHbaseConf().get("hbase.zookeeper.quorum"); - String[] splits = servers.split(","); - StringBuffer sb = new StringBuffer(); - for (String split : splits) { - sb.append(split); - sb.append(':'); - sb.append(port); - } - - ArrayList clients = new ArrayList(); - - for (int i = 0; i < 5; i++) { - IDGenClient idClient = new IDGenClient(sb.toString(), "/rm_base", 10, "testTable"); - clients.add(idClient); - } - - for (IDGenClient idClient : clients) { - idClient.run(); - } - - for (IDGenClient idClient : clients) { - idClient.join(); - } - - HashMap idMap = new HashMap(); - for (IDGenClient idClient : clients) { - idMap.putAll(idClient.getIdMap()); - } - - ArrayList keys = new ArrayList(idMap.keySet()); - Collections.sort(keys); - int startId = 1; - for (Long key : keys) { - Long id = idMap.get(key); - System.out.println("Key: " + key + " Value " + id); - assertTrue(id == startId); - startId++; - - } - } -} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestRevisionManager.java hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestRevisionManager.java deleted file mode 100644 index 114895a..0000000 --- hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestRevisionManager.java +++ /dev/null @@ -1,260 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.hbase.snapshot; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.util.Arrays; -import java.util.List; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hcatalog.hbase.SkeletonHBaseTest; -import org.apache.hcatalog.hbase.snapshot.transaction.thrift.StoreFamilyRevision; -import org.apache.hcatalog.hbase.snapshot.transaction.thrift.StoreFamilyRevisionList; -import org.apache.zookeeper.KeeperException; -import org.apache.zookeeper.ZooKeeper; -import org.apache.zookeeper.data.Stat; -import org.junit.Test; - -public class TestRevisionManager extends SkeletonHBaseTest { - - @Test - public void testBasicZNodeCreation() throws IOException, KeeperException, InterruptedException { - - int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); - String servers = getHbaseConf().get("hbase.zookeeper.quorum"); - String[] splits = servers.split(","); - StringBuffer sb = new StringBuffer(); - for (String split : splits) { - sb.append(split); - sb.append(':'); - sb.append(port); - } - - ZKUtil zkutil = new ZKUtil(sb.toString(), "/rm_base"); - String tableName = newTableName("testTable"); - List columnFamilies = Arrays.asList("cf001", "cf002", "cf003"); - - zkutil.createRootZNodes(); - ZooKeeper zk = zkutil.getSession(); - Stat tempTwo = zk.exists("/rm_base" + PathUtil.DATA_DIR, false); - assertTrue(tempTwo != null); - Stat tempThree = zk.exists("/rm_base" + PathUtil.CLOCK_NODE, false); - assertTrue(tempThree != null); - - zkutil.setUpZnodesForTable(tableName, columnFamilies); - String transactionDataTablePath = "/rm_base" + PathUtil.DATA_DIR + "/" + tableName; - Stat result = zk.exists(transactionDataTablePath, false); - assertTrue(result != null); - - for (String colFamiliy : columnFamilies) { - String cfPath = transactionDataTablePath + "/" + colFamiliy; - Stat resultTwo = zk.exists(cfPath, false); - assertTrue(resultTwo != null); - } - - } - - @Test - public void testCommitTransaction() throws IOException { - - int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); - String servers = getHbaseConf().get("hbase.zookeeper.quorum"); - String[] splits = servers.split(","); - StringBuffer sb = new StringBuffer(); - for (String split : splits) { - sb.append(split); - sb.append(':'); - sb.append(port); - } - - Configuration conf = RevisionManagerConfiguration.create(getHbaseConf()); - conf.set(RMConstants.ZOOKEEPER_DATADIR, "/rm_base"); - ZKBasedRevisionManager manager = new ZKBasedRevisionManager(); - manager.initialize(conf); - manager.open(); - ZKUtil zkutil = new ZKUtil(sb.toString(), "/rm_base"); - - String tableName = newTableName("testTable"); - List columnFamilies = Arrays.asList("cf1", "cf2", "cf3"); - Transaction txn = manager.beginWriteTransaction(tableName, - columnFamilies); - - List cfs = zkutil.getColumnFamiliesOfTable(tableName); - assertTrue(cfs.size() == columnFamilies.size()); - for (String cf : cfs) { - assertTrue(columnFamilies.contains(cf)); - } - - for (String colFamily : columnFamilies) { - String path = PathUtil.getRunningTxnInfoPath("/rm_base", tableName, colFamily); - byte[] data = zkutil.getRawData(path, null); - StoreFamilyRevisionList list = new StoreFamilyRevisionList(); - ZKUtil.deserialize(list, data); - assertEquals(list.getRevisionListSize(), 1); - StoreFamilyRevision lightTxn = list.getRevisionList().get(0); - assertEquals(lightTxn.timestamp, txn.getTransactionExpireTimeStamp()); - assertEquals(lightTxn.revision, txn.getRevisionNumber()); - - } - manager.commitWriteTransaction(txn); - for (String colFamiliy : columnFamilies) { - String path = PathUtil.getRunningTxnInfoPath("/rm_base", tableName, colFamiliy); - byte[] data = zkutil.getRawData(path, null); - StoreFamilyRevisionList list = new StoreFamilyRevisionList(); - ZKUtil.deserialize(list, data); - assertEquals(list.getRevisionListSize(), 0); - - } - - manager.close(); - } - - @Test - public void testAbortTransaction() throws IOException { - - int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); - String host = getHbaseConf().get("hbase.zookeeper.quorum"); - Configuration conf = RevisionManagerConfiguration.create(getHbaseConf()); - conf.set(RMConstants.ZOOKEEPER_DATADIR, "/rm_base"); - ZKBasedRevisionManager manager = new ZKBasedRevisionManager(); - manager.initialize(conf); - manager.open(); - ZKUtil zkutil = new ZKUtil(host + ':' + port, "/rm_base"); - - String tableName = newTableName("testTable"); - List columnFamilies = Arrays.asList("cf1", "cf2", "cf3"); - Transaction txn = manager.beginWriteTransaction(tableName, columnFamilies); - List cfs = zkutil.getColumnFamiliesOfTable(tableName); - - assertTrue(cfs.size() == columnFamilies.size()); - for (String cf : cfs) { - assertTrue(columnFamilies.contains(cf)); - } - - for (String colFamiliy : columnFamilies) { - String path = PathUtil.getRunningTxnInfoPath("/rm_base", tableName, colFamiliy); - byte[] data = zkutil.getRawData(path, null); - StoreFamilyRevisionList list = new StoreFamilyRevisionList(); - ZKUtil.deserialize(list, data); - assertEquals(list.getRevisionListSize(), 1); - StoreFamilyRevision lightTxn = list.getRevisionList().get(0); - assertEquals(lightTxn.timestamp, txn.getTransactionExpireTimeStamp()); - assertEquals(lightTxn.revision, txn.getRevisionNumber()); - - } - manager.abortWriteTransaction(txn); - for (String colFamiliy : columnFamilies) { - String path = PathUtil.getRunningTxnInfoPath("/rm_base", tableName, colFamiliy); - byte[] data = zkutil.getRawData(path, null); - StoreFamilyRevisionList list = new StoreFamilyRevisionList(); - ZKUtil.deserialize(list, data); - assertEquals(list.getRevisionListSize(), 0); - - } - - for (String colFamiliy : columnFamilies) { - String path = PathUtil.getAbortInformationPath("/rm_base", tableName, colFamiliy); - byte[] data = zkutil.getRawData(path, null); - StoreFamilyRevisionList list = new StoreFamilyRevisionList(); - ZKUtil.deserialize(list, data); - assertEquals(list.getRevisionListSize(), 1); - StoreFamilyRevision abortedTxn = list.getRevisionList().get(0); - assertEquals(abortedTxn.getRevision(), txn.getRevisionNumber()); - } - manager.close(); - } - - @Test - public void testKeepAliveTransaction() throws InterruptedException, IOException { - - int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); - String servers = getHbaseConf().get("hbase.zookeeper.quorum"); - String[] splits = servers.split(","); - StringBuffer sb = new StringBuffer(); - for (String split : splits) { - sb.append(split); - sb.append(':'); - sb.append(port); - } - - Configuration conf = RevisionManagerConfiguration.create(getHbaseConf()); - conf.set(RMConstants.ZOOKEEPER_DATADIR, "/rm_base"); - ZKBasedRevisionManager manager = new ZKBasedRevisionManager(); - manager.initialize(conf); - manager.open(); - String tableName = newTableName("testTable"); - List columnFamilies = Arrays.asList("cf1", "cf2"); - Transaction txn = manager.beginWriteTransaction(tableName, - columnFamilies, 40); - Thread.sleep(100); - try { - manager.commitWriteTransaction(txn); - } catch (Exception e) { - assertTrue(e instanceof IOException); - assertEquals(e.getMessage(), - "The transaction to be removed not found in the data."); - } - - } - - @Test - public void testCreateSnapshot() throws IOException { - int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); - String host = getHbaseConf().get("hbase.zookeeper.quorum"); - Configuration conf = RevisionManagerConfiguration.create(getHbaseConf()); - conf.set(RMConstants.ZOOKEEPER_DATADIR, "/rm_base"); - ZKBasedRevisionManager manager = new ZKBasedRevisionManager(); - manager.initialize(conf); - manager.open(); - String tableName = newTableName("testTable"); - List cfOne = Arrays.asList("cf1", "cf2"); - List cfTwo = Arrays.asList("cf2", "cf3"); - Transaction tsx1 = manager.beginWriteTransaction(tableName, cfOne); - Transaction tsx2 = manager.beginWriteTransaction(tableName, cfTwo); - TableSnapshot snapshotOne = manager.createSnapshot(tableName); - assertEquals(snapshotOne.getRevision("cf1"), 0); - assertEquals(snapshotOne.getRevision("cf2"), 0); - assertEquals(snapshotOne.getRevision("cf3"), 1); - - List cfThree = Arrays.asList("cf1", "cf3"); - Transaction tsx3 = manager.beginWriteTransaction(tableName, cfThree); - manager.commitWriteTransaction(tsx1); - TableSnapshot snapshotTwo = manager.createSnapshot(tableName); - assertEquals(snapshotTwo.getRevision("cf1"), 2); - assertEquals(snapshotTwo.getRevision("cf2"), 1); - assertEquals(snapshotTwo.getRevision("cf3"), 1); - - manager.commitWriteTransaction(tsx2); - TableSnapshot snapshotThree = manager.createSnapshot(tableName); - assertEquals(snapshotThree.getRevision("cf1"), 2); - assertEquals(snapshotThree.getRevision("cf2"), 3); - assertEquals(snapshotThree.getRevision("cf3"), 2); - manager.commitWriteTransaction(tsx3); - TableSnapshot snapshotFour = manager.createSnapshot(tableName); - assertEquals(snapshotFour.getRevision("cf1"), 3); - assertEquals(snapshotFour.getRevision("cf2"), 3); - assertEquals(snapshotFour.getRevision("cf3"), 3); - - } - - -} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestRevisionManagerConfiguration.java hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestRevisionManagerConfiguration.java deleted file mode 100644 index 301bf92..0000000 --- hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestRevisionManagerConfiguration.java +++ /dev/null @@ -1,34 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase.snapshot; - -import org.apache.hadoop.conf.Configuration; -import org.junit.Assert; -import org.junit.Test; - -public class TestRevisionManagerConfiguration { - - @Test - public void testDefault() { - Configuration conf = RevisionManagerConfiguration.create(); - Assert.assertEquals("org.apache.hcatalog.hbase.snapshot.ZKBasedRevisionManager", - conf.get(RevisionManagerFactory.REVISION_MGR_IMPL_CLASS)); - } -} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestRevisionManagerEndpoint.java hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestRevisionManagerEndpoint.java deleted file mode 100644 index fe9ca40..0000000 --- hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestRevisionManagerEndpoint.java +++ /dev/null @@ -1,206 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.hbase.snapshot; - -import java.io.IOException; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; - -import org.apache.commons.lang.builder.ToStringBuilder; -import org.apache.commons.lang.builder.ToStringStyle; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; -import org.apache.hcatalog.hbase.SkeletonHBaseTest; -import org.junit.Assert; -import org.junit.Test; - -public class TestRevisionManagerEndpoint extends SkeletonHBaseTest { - - static { - // test case specific mini cluster settings - testConf = new Configuration(false); - testConf.setStrings(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, - "org.apache.hcatalog.hbase.snapshot.RevisionManagerEndpoint", - "org.apache.hadoop.hbase.coprocessor.GenericEndpoint"); - testConf.set(RMConstants.REVISION_MGR_ENDPOINT_IMPL_CLASS, MockRM.class.getName()); - } - - /** - * Mock implementation to test the protocol/serialization - */ - public static class MockRM implements RevisionManager { - - private static class Invocation { - Invocation(String methodName, Object ret, Object... args) { - this.methodName = methodName; - this.args = args; - this.ret = ret; - } - - String methodName; - Object[] args; - Object ret; - - private static boolean equals(Object obj1, Object obj2) { - if (obj1 == obj2) return true; - if (obj1 == null || obj2 == null) return false; - if (obj1 instanceof Transaction || obj1 instanceof TableSnapshot) { - return obj1.toString().equals(obj2.toString()); - } - return obj1.equals(obj2); - } - - @Override - public boolean equals(Object obj) { - Invocation other = (Invocation) obj; - if (this == other) return true; - if (other == null) return false; - if (this.args != other.args) { - if (this.args == null || other.args == null) return false; - if (this.args.length != other.args.length) return false; - for (int i = 0; i < args.length; i++) { - if (!equals(this.args[i], other.args[i])) return false; - } - } - return equals(this.ret, other.ret); - } - - @Override - public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE). - append("method", this.methodName). - append("args", this.args). - append("returns", this.ret). - toString(); - } - } - - final static String DEFAULT_INSTANCE = "default"; - final static Map INSTANCES = new ConcurrentHashMap(); - Invocation lastCall; - boolean isOpen = false; - - private T recordCall(T result, Object... args) { - StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace(); - lastCall = new Invocation(stackTrace[2].getMethodName(), result, args); - return result; - } - - @Override - public void initialize(Configuration conf) { - if (!INSTANCES.containsKey(DEFAULT_INSTANCE)) - INSTANCES.put(DEFAULT_INSTANCE, this); - } - - @Override - public void open() throws IOException { - isOpen = true; - } - - @Override - public void close() throws IOException { - isOpen = false; - } - - @Override - public void createTable(String table, List columnFamilies) throws IOException { - } - - @Override - public void dropTable(String table) throws IOException { - } - - @Override - public Transaction beginWriteTransaction(String table, - List families) throws IOException { - return recordCall(null, table, families); - } - - @Override - public Transaction beginWriteTransaction(String table, - List families, long keepAlive) throws IOException { - return recordCall(null, table, families, keepAlive); - } - - @Override - public void commitWriteTransaction(Transaction transaction) - throws IOException { - } - - @Override - public void abortWriteTransaction(Transaction transaction) - throws IOException { - } - - @Override - public List getAbortedWriteTransactions(String table, - String columnFamily) throws IOException { - return null; - } - - @Override - public TableSnapshot createSnapshot(String tableName) - throws IOException { - return null; - } - - @Override - public TableSnapshot createSnapshot(String tableName, long revision) - throws IOException { - TableSnapshot ret = new TableSnapshot(tableName, new HashMap(), revision); - return recordCall(ret, tableName, revision); - } - - @Override - public void keepAlive(Transaction transaction) throws IOException { - recordCall(null, transaction); - } - } - - @Test - public void testRevisionManagerProtocol() throws Throwable { - - Configuration conf = getHbaseConf(); - RevisionManager rm = RevisionManagerFactory.getOpenedRevisionManager( - RevisionManagerEndpointClient.class.getName(), conf); - - MockRM mockImpl = MockRM.INSTANCES.get(MockRM.DEFAULT_INSTANCE); - Assert.assertNotNull(mockImpl); - Assert.assertTrue(mockImpl.isOpen); - - Transaction t = new Transaction("t1", Arrays.asList("f1", "f2"), 0, 0); - MockRM.Invocation call = new MockRM.Invocation("keepAlive", null, t); - rm.keepAlive(t); - Assert.assertEquals(call.methodName, call, mockImpl.lastCall); - - t = new Transaction("t2", Arrays.asList("f21", "f22"), 0, 0); - call = new MockRM.Invocation("beginWriteTransaction", null, t.getTableName(), t.getColumnFamilies()); - call.ret = rm.beginWriteTransaction(t.getTableName(), t.getColumnFamilies()); - Assert.assertEquals(call.methodName, call, mockImpl.lastCall); - - call = new MockRM.Invocation("createSnapshot", null, "t3", 1L); - call.ret = rm.createSnapshot("t3", 1); - Assert.assertEquals(call.methodName, call, mockImpl.lastCall); - - } - -} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestThriftSerialization.java hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestThriftSerialization.java deleted file mode 100644 index e423f65..0000000 --- hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestThriftSerialization.java +++ /dev/null @@ -1,85 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.hbase.snapshot; - -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -import org.apache.hcatalog.hbase.snapshot.transaction.thrift.StoreFamilyRevision; -import org.apache.hcatalog.hbase.snapshot.transaction.thrift.StoreFamilyRevisionList; -import org.junit.Test; - -public class TestThriftSerialization { - - @Test - public void testLightWeightTransaction() { - StoreFamilyRevision trxn = new StoreFamilyRevision(0, 1000); - try { - - byte[] data = ZKUtil.serialize(trxn); - StoreFamilyRevision newWtx = new StoreFamilyRevision(); - ZKUtil.deserialize(newWtx, data); - - assertTrue(newWtx.getRevision() == trxn.getRevision()); - assertTrue(newWtx.getTimestamp() == trxn.getTimestamp()); - - } catch (IOException e) { - e.printStackTrace(); - } - } - - @Test - public void testWriteTransactionList() { - List txnList = new ArrayList(); - long version; - long timestamp; - for (int i = 0; i < 10; i++) { - version = i; - timestamp = 1000 + i; - StoreFamilyRevision wtx = new StoreFamilyRevision(version, timestamp); - txnList.add(wtx); - } - - StoreFamilyRevisionList wList = new StoreFamilyRevisionList(txnList); - - try { - byte[] data = ZKUtil.serialize(wList); - StoreFamilyRevisionList newList = new StoreFamilyRevisionList(); - ZKUtil.deserialize(newList, data); - assertTrue(newList.getRevisionListSize() == wList.getRevisionListSize()); - - Iterator itr = newList.getRevisionListIterator(); - int i = 0; - while (itr.hasNext()) { - StoreFamilyRevision txn = itr.next(); - assertTrue(txn.getRevision() == i); - assertTrue(txn.getTimestamp() == (i + 1000)); - i++; - } - - } catch (IOException e) { - e.printStackTrace(); - } - } - -} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestZNodeSetUp.java hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestZNodeSetUp.java deleted file mode 100644 index cb0e7cf..0000000 --- hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/TestZNodeSetUp.java +++ /dev/null @@ -1,120 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase.snapshot; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.net.URI; -import java.util.Map; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.client.HBaseAdmin; -import org.apache.hadoop.hive.cli.CliSessionState; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.apache.hcatalog.cli.HCatDriver; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hcatalog.hbase.SkeletonHBaseTest; -import org.apache.zookeeper.ZooKeeper; -import org.apache.zookeeper.data.Stat; -import org.junit.Test; - - -public class TestZNodeSetUp extends SkeletonHBaseTest { - - private static HiveConf hcatConf; - private static HCatDriver hcatDriver; - - public void Initialize() throws Exception { - - hcatConf = getHiveConf(); - hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - URI fsuri = getFileSystem().getUri(); - Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), - getTestDir()); - hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); - hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); - - //Add hbase properties - - for (Map.Entry el : getHbaseConf()) { - if (el.getKey().startsWith("hbase.")) { - hcatConf.set(el.getKey(), el.getValue()); - } - } - HBaseConfiguration.merge(hcatConf, - RevisionManagerConfiguration.create()); - hcatConf.set(RMConstants.ZOOKEEPER_DATADIR, "/rm_base"); - SessionState.start(new CliSessionState(hcatConf)); - hcatDriver = new HCatDriver(); - - } - - @Test - public void testBasicZNodeCreation() throws Exception { - - Initialize(); - int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); - String servers = getHbaseConf().get("hbase.zookeeper.quorum"); - String[] splits = servers.split(","); - StringBuffer sb = new StringBuffer(); - for (String split : splits) { - sb.append(split); - sb.append(':'); - sb.append(port); - } - - hcatDriver.run("drop table test_table"); - CommandProcessorResponse response = hcatDriver - .run("create table test_table(key int, value string) STORED BY " + - "'org.apache.hcatalog.hbase.HBaseHCatStorageHandler'" - + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:val')"); - - assertEquals(0, response.getResponseCode()); - - HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); - boolean doesTableExist = hAdmin.tableExists("test_table"); - assertTrue(doesTableExist); - - - ZKUtil zkutil = new ZKUtil(sb.toString(), "/rm_base"); - ZooKeeper zk = zkutil.getSession(); - String tablePath = PathUtil.getTxnDataPath("/rm_base", "test_table"); - Stat tempTwo = zk.exists(tablePath, false); - assertTrue(tempTwo != null); - - String cfPath = PathUtil.getTxnDataPath("/rm_base", "test_table") + "/cf1"; - Stat tempThree = zk.exists(cfPath, false); - assertTrue(tempThree != null); - - hcatDriver.run("drop table test_table"); - - System.out.println("Table path : " + tablePath); - Stat tempFour = zk.exists(tablePath, false); - assertTrue(tempFour == null); - - } - -} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/lock/TestWriteLock.java hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/lock/TestWriteLock.java deleted file mode 100644 index c03a00b..0000000 --- hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/lock/TestWriteLock.java +++ /dev/null @@ -1,161 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase.snapshot.lock; - -import org.apache.zookeeper.ZooKeeper; -import org.apache.zookeeper.test.ClientBase; - -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; - -import org.junit.After; -import org.junit.Assert; -import org.junit.Test; - -/** - * test for writelock - * This class is taken from the zookeeper 3.4.0 as-is to test the zookeeper lock - * Recipe with a change in the package name. - */ -public class TestWriteLock extends ClientBase { - protected int sessionTimeout = 10 * 1000; - protected String dir = "/" + getClass().getName(); - protected WriteLock[] nodes; - protected CountDownLatch latch = new CountDownLatch(1); - private boolean restartServer = true; - private boolean workAroundClosingLastZNodeFails = true; - private boolean killLeader = true; - - @Test - public void testRun() throws Exception { - runTest(3); - } - - class LockCallback implements LockListener { - public void lockAcquired() { - latch.countDown(); - } - - public void lockReleased() { - - } - - } - - protected void runTest(int count) throws Exception { - nodes = new WriteLock[count]; - for (int i = 0; i < count; i++) { - ZooKeeper keeper = createClient(); - WriteLock leader = new WriteLock(keeper, dir, null); - leader.setLockListener(new LockCallback()); - nodes[i] = leader; - - leader.lock(); - } - - // lets wait for any previous leaders to die and one of our new - // nodes to become the new leader - latch.await(30, TimeUnit.SECONDS); - - WriteLock first = nodes[0]; - dumpNodes(count); - - // lets assert that the first election is the leader - Assert.assertTrue("The first znode should be the leader " + first.getId(), first.isOwner()); - - for (int i = 1; i < count; i++) { - WriteLock node = nodes[i]; - Assert.assertFalse("Node should not be the leader " + node.getId(), node.isOwner()); - } - - if (count > 1) { - if (killLeader) { - System.out.println("Now killing the leader"); - // now lets kill the leader - latch = new CountDownLatch(1); - first.unlock(); - latch.await(30, TimeUnit.SECONDS); - //Thread.sleep(10000); - WriteLock second = nodes[1]; - dumpNodes(count); - // lets assert that the first election is the leader - Assert.assertTrue("The second znode should be the leader " + second.getId(), second.isOwner()); - - for (int i = 2; i < count; i++) { - WriteLock node = nodes[i]; - Assert.assertFalse("Node should not be the leader " + node.getId(), node.isOwner()); - } - } - - - if (restartServer) { - // now lets stop the server - System.out.println("Now stopping the server"); - stopServer(); - Thread.sleep(10000); - - // TODO lets assert that we are no longer the leader - dumpNodes(count); - - System.out.println("Starting the server"); - startServer(); - Thread.sleep(10000); - - for (int i = 0; i < count - 1; i++) { - System.out.println("Calling acquire for node: " + i); - nodes[i].lock(); - } - dumpNodes(count); - System.out.println("Now closing down..."); - } - } - } - - protected void dumpNodes(int count) { - for (int i = 0; i < count; i++) { - WriteLock node = nodes[i]; - System.out.println("node: " + i + " id: " + - node.getId() + " is leader: " + node.isOwner()); - } - } - - @After - public void tearDown() throws Exception { - if (nodes != null) { - for (int i = 0; i < nodes.length; i++) { - WriteLock node = nodes[i]; - if (node != null) { - System.out.println("Closing node: " + i); - node.close(); - if (workAroundClosingLastZNodeFails && i == nodes.length - 1) { - System.out.println("Not closing zookeeper: " + i + " due to bug!"); - } else { - System.out.println("Closing zookeeper: " + i); - node.getZookeeper().close(); - System.out.println("Closed zookeeper: " + i); - } - } - } - } - System.out.println("Now lets stop the server"); - super.tearDown(); - - } -} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/lock/TestZNodeName.java hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/lock/TestZNodeName.java deleted file mode 100644 index a39c691..0000000 --- hcatalog/storage-handlers/hbase/src/test/org/apache/hcatalog/hbase/snapshot/lock/TestZNodeName.java +++ /dev/null @@ -1,62 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.hbase.snapshot.lock; - -import junit.framework.TestCase; - -import java.util.SortedSet; -import java.util.TreeSet; - -import org.junit.Test; - -/** - * test for znodenames. This class is taken as-is from zookeeper lock recipe test. - * The package name has been changed. - */ -public class TestZNodeName extends TestCase { - @Test - public void testOrderWithSamePrefix() throws Exception { - String[] names = { "x-3", "x-5", "x-11", "x-1" }; - String[] expected = { "x-1", "x-3", "x-5", "x-11" }; - assertOrderedNodeNames(names, expected); - } - @Test - public void testOrderWithDifferentPrefixes() throws Exception { - String[] names = { "r-3", "r-2", "r-1", "w-2", "w-1" }; - String[] expected = { "r-1", "r-2", "r-3", "w-1", "w-2" }; - assertOrderedNodeNames(names, expected); - } - - protected void assertOrderedNodeNames(String[] names, String[] expected) { - int size = names.length; - assertEquals("The two arrays should be the same size!", names.length, expected.length); - SortedSet nodeNames = new TreeSet(); - for (String name : names) { - nodeNames.add(new ZNodeName(name)); - } - - int index = 0; - for (ZNodeName nodeName : nodeNames) { - String name = nodeName.getName(); - assertEquals("Node " + index, expected[index++], name); - } - } - -} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/ManyMiniCluster.java hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/ManyMiniCluster.java new file mode 100644 index 0000000..d89ec25 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/ManyMiniCluster.java @@ -0,0 +1,370 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.client.HConnectionManager; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.MiniMRCluster; + +import java.io.File; +import java.io.IOException; +import java.net.ServerSocket; + +/** + * MiniCluster class composed of a number of Hadoop Minicluster implementations + * and other necessary daemons needed for testing (HBase, Hive MetaStore, Zookeeper, MiniMRCluster) + */ +public class ManyMiniCluster { + + //MR stuff + private boolean miniMRClusterEnabled; + private MiniMRCluster mrCluster; + private int numTaskTrackers; + private JobConf jobConf; + + //HBase stuff + private boolean miniHBaseClusterEnabled; + private MiniHBaseCluster hbaseCluster; + private String hbaseRoot; + private Configuration hbaseConf; + private String hbaseDir; + + //ZK Stuff + private boolean miniZookeeperClusterEnabled; + private MiniZooKeeperCluster zookeeperCluster; + private int zookeeperPort; + private String zookeeperDir; + + //DFS Stuff + private MiniDFSCluster dfsCluster; + + //Hive Stuff + private boolean miniHiveMetastoreEnabled; + private HiveConf hiveConf; + private HiveMetaStoreClient hiveMetaStoreClient; + + private final File workDir; + private boolean started = false; + + + /** + * create a cluster instance using a builder which will expose configurable options + * @param workDir working directory ManyMiniCluster will use for all of it's *Minicluster instances + * @return a Builder instance + */ + public static Builder create(File workDir) { + return new Builder(workDir); + } + + private ManyMiniCluster(Builder b) { + workDir = b.workDir; + numTaskTrackers = b.numTaskTrackers; + hiveConf = b.hiveConf; + jobConf = b.jobConf; + hbaseConf = b.hbaseConf; + miniMRClusterEnabled = b.miniMRClusterEnabled; + miniHBaseClusterEnabled = b.miniHBaseClusterEnabled; + miniHiveMetastoreEnabled = b.miniHiveMetastoreEnabled; + miniZookeeperClusterEnabled = b.miniZookeeperClusterEnabled; + } + + protected synchronized void start() { + try { + if (!started) { + FileUtil.fullyDelete(workDir); + if (miniMRClusterEnabled) { + setupMRCluster(); + } + if (miniZookeeperClusterEnabled || miniHBaseClusterEnabled) { + miniZookeeperClusterEnabled = true; + setupZookeeper(); + } + if (miniHBaseClusterEnabled) { + setupHBaseCluster(); + } + if (miniHiveMetastoreEnabled) { + setUpMetastore(); + } + } + } catch (Exception e) { + throw new IllegalStateException("Failed to setup cluster", e); + } + } + + protected synchronized void stop() { + if (hbaseCluster != null) { + HConnectionManager.deleteAllConnections(true); + try { + hbaseCluster.shutdown(); + } catch (Exception e) { + e.printStackTrace(); + } + hbaseCluster = null; + } + if (zookeeperCluster != null) { + try { + zookeeperCluster.shutdown(); + } catch (Exception e) { + e.printStackTrace(); + } + zookeeperCluster = null; + } + if (mrCluster != null) { + try { + mrCluster.shutdown(); + } catch (Exception e) { + e.printStackTrace(); + } + mrCluster = null; + } + if (dfsCluster != null) { + try { + dfsCluster.getFileSystem().close(); + dfsCluster.shutdown(); + } catch (Exception e) { + e.printStackTrace(); + } + dfsCluster = null; + } + try { + FileSystem.closeAll(); + } catch (IOException e) { + e.printStackTrace(); + } + started = false; + } + + /** + * @return Configuration of mini HBase cluster + */ + public Configuration getHBaseConf() { + return HBaseConfiguration.create(hbaseConf); + } + + /** + * @return Configuration of mini MR cluster + */ + public Configuration getJobConf() { + return new Configuration(jobConf); + } + + /** + * @return Configuration of Hive Metastore, this is a standalone not a daemon + */ + public HiveConf getHiveConf() { + return new HiveConf(hiveConf); + } + + /** + * @return Filesystem used by MiniMRCluster and MiniHBaseCluster + */ + public FileSystem getFileSystem() { + try { + return FileSystem.get(jobConf); + } catch (IOException e) { + throw new IllegalStateException("Failed to get FileSystem", e); + } + } + + /** + * @return Metastore client instance + */ + public HiveMetaStoreClient getHiveMetaStoreClient() { + return hiveMetaStoreClient; + } + + private void setupMRCluster() { + try { + final int jobTrackerPort = findFreePort(); + final int taskTrackerPort = findFreePort(); + + if (jobConf == null) + jobConf = new JobConf(); + + jobConf.setInt("mapred.submit.replication", 1); + jobConf.set("yarn.scheduler.capacity.root.queues", "default"); + jobConf.set("yarn.scheduler.capacity.root.default.capacity", "100"); + //conf.set("hadoop.job.history.location",new File(workDir).getAbsolutePath()+"/history"); + System.setProperty("hadoop.log.dir", new File(workDir, "/logs").getAbsolutePath()); + + mrCluster = new MiniMRCluster(jobTrackerPort, + taskTrackerPort, + numTaskTrackers, + getFileSystem().getUri().toString(), + numTaskTrackers, + null, + null, + null, + jobConf); + + jobConf = mrCluster.createJobConf(); + } catch (IOException e) { + throw new IllegalStateException("Failed to Setup MR Cluster", e); + } + } + + private void setupZookeeper() { + try { + zookeeperDir = new File(workDir, "zk").getAbsolutePath(); + zookeeperPort = findFreePort(); + zookeeperCluster = new MiniZooKeeperCluster(); + zookeeperCluster.setDefaultClientPort(zookeeperPort); + zookeeperCluster.startup(new File(zookeeperDir)); + } catch (Exception e) { + throw new IllegalStateException("Failed to Setup Zookeeper Cluster", e); + } + } + + private void setupHBaseCluster() { + final int numRegionServers = 1; + + try { + hbaseDir = new File(workDir, "hbase").toString(); + hbaseDir = hbaseDir.replaceAll("\\\\", "/"); + hbaseRoot = "file://" + hbaseDir; + + if (hbaseConf == null) + hbaseConf = HBaseConfiguration.create(); + + hbaseConf.set("hbase.rootdir", hbaseRoot); + hbaseConf.set("hbase.master", "local"); + hbaseConf.setInt(HConstants.ZOOKEEPER_CLIENT_PORT, zookeeperPort); + hbaseConf.set(HConstants.ZOOKEEPER_QUORUM, "127.0.0.1"); + hbaseConf.setInt("hbase.master.port", findFreePort()); + hbaseConf.setInt("hbase.master.info.port", -1); + hbaseConf.setInt("hbase.regionserver.port", findFreePort()); + hbaseConf.setInt("hbase.regionserver.info.port", -1); + + hbaseCluster = new MiniHBaseCluster(hbaseConf, numRegionServers); + hbaseConf.set("hbase.master", hbaseCluster.getMaster().getServerName().getHostAndPort()); + //opening the META table ensures that cluster is running + new HTable(hbaseConf, HConstants.META_TABLE_NAME); + } catch (Exception e) { + throw new IllegalStateException("Failed to setup HBase Cluster", e); + } + } + + private void setUpMetastore() throws Exception { + if (hiveConf == null) + hiveConf = new HiveConf(this.getClass()); + + //The default org.apache.hadoop.hive.ql.hooks.PreExecutePrinter hook + //is present only in the ql/test directory + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + hiveConf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, + "jdbc:derby:" + new File(workDir + "/metastore_db") + ";create=true"); + hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.toString(), + new File(workDir, "warehouse").toString()); + //set where derby logs + File derbyLogFile = new File(workDir + "/derby.log"); + derbyLogFile.createNewFile(); + System.setProperty("derby.stream.error.file", derbyLogFile.getPath()); + + +// Driver driver = new Driver(hiveConf); +// SessionState.start(new CliSessionState(hiveConf)); + + hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf); + } + + private static int findFreePort() throws IOException { + ServerSocket server = new ServerSocket(0); + int port = server.getLocalPort(); + server.close(); + return port; + } + + public static class Builder { + private File workDir; + private int numTaskTrackers = 1; + private JobConf jobConf; + private Configuration hbaseConf; + private HiveConf hiveConf; + + private boolean miniMRClusterEnabled = true; + private boolean miniHBaseClusterEnabled = true; + private boolean miniHiveMetastoreEnabled = true; + private boolean miniZookeeperClusterEnabled = true; + + + private Builder(File workDir) { + this.workDir = workDir; + } + + public Builder numTaskTrackers(int num) { + numTaskTrackers = num; + return this; + } + + public Builder jobConf(JobConf jobConf) { + this.jobConf = jobConf; + return this; + } + + public Builder hbaseConf(Configuration hbaseConf) { + this.hbaseConf = hbaseConf; + return this; + } + + public Builder hiveConf(HiveConf hiveConf) { + this.hiveConf = hiveConf; + return this; + } + + public Builder miniMRClusterEnabled(boolean enabled) { + this.miniMRClusterEnabled = enabled; + return this; + } + + public Builder miniHBaseClusterEnabled(boolean enabled) { + this.miniHBaseClusterEnabled = enabled; + return this; + } + + public Builder miniZookeeperClusterEnabled(boolean enabled) { + this.miniZookeeperClusterEnabled = enabled; + return this; + } + + public Builder miniHiveMetastoreEnabled(boolean enabled) { + this.miniHiveMetastoreEnabled = enabled; + return this; + } + + + public ManyMiniCluster build() { + return new ManyMiniCluster(this); + } + + } +} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/SkeletonHBaseTest.java hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/SkeletonHBaseTest.java new file mode 100644 index 0000000..9495549 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/SkeletonHBaseTest.java @@ -0,0 +1,237 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Random; +import java.util.Set; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hive.conf.HiveConf; +import org.junit.AfterClass; +import org.junit.BeforeClass; + +/** + * Base class for HBase Tests which need a mini cluster instance + */ +public abstract class SkeletonHBaseTest { + + protected static String TEST_DIR = "/tmp/build/test/data/"; + + protected final static String DEFAULT_CONTEXT_HANDLE = "default"; + + protected static Map contextMap = new HashMap(); + protected static Set tableNames = new HashSet(); + + /** + * Allow tests to alter the default MiniCluster configuration. + * (requires static initializer block as all setup here is static) + */ + protected static Configuration testConf = null; + + protected void createTable(String tableName, String[] families) { + try { + HBaseAdmin admin = new HBaseAdmin(getHbaseConf()); + HTableDescriptor tableDesc = new HTableDescriptor(tableName); + for (String family : families) { + HColumnDescriptor columnDescriptor = new HColumnDescriptor(family); + tableDesc.addFamily(columnDescriptor); + } + admin.createTable(tableDesc); + } catch (Exception e) { + e.printStackTrace(); + throw new IllegalStateException(e); + } + + } + + protected String newTableName(String prefix) { + String name = null; + int tries = 100; + do { + name = prefix + "_" + Math.abs(new Random().nextLong()); + } while (tableNames.contains(name) && --tries > 0); + if (tableNames.contains(name)) + throw new IllegalStateException("Couldn't find a unique table name, tableNames size: " + tableNames.size()); + tableNames.add(name); + return name; + } + + + /** + * startup an hbase cluster instance before a test suite runs + */ + @BeforeClass + public static void setup() { + if (!contextMap.containsKey(getContextHandle())) + contextMap.put(getContextHandle(), new Context(getContextHandle())); + + contextMap.get(getContextHandle()).start(); + } + + /** + * shutdown an hbase cluster instance ant the end of the test suite + */ + @AfterClass + public static void tearDown() { + contextMap.get(getContextHandle()).stop(); + } + + /** + * override this with a different context handle if tests suites are run simultaneously + * and ManyMiniCluster instances shouldn't be shared + * @return + */ + public static String getContextHandle() { + return DEFAULT_CONTEXT_HANDLE; + } + + /** + * @return working directory for a given test context, which normally is a test suite + */ + public String getTestDir() { + return contextMap.get(getContextHandle()).getTestDir(); + } + + /** + * @return ManyMiniCluster instance + */ + public ManyMiniCluster getCluster() { + return contextMap.get(getContextHandle()).getCluster(); + } + + /** + * @return configuration of MiniHBaseCluster + */ + public Configuration getHbaseConf() { + return contextMap.get(getContextHandle()).getHbaseConf(); + } + + /** + * @return configuration of MiniMRCluster + */ + public Configuration getJobConf() { + return contextMap.get(getContextHandle()).getJobConf(); + } + + /** + * @return configuration of Hive Metastore + */ + public HiveConf getHiveConf() { + return contextMap.get(getContextHandle()).getHiveConf(); + } + + /** + * @return filesystem used by ManyMiniCluster daemons + */ + public FileSystem getFileSystem() { + return contextMap.get(getContextHandle()).getFileSystem(); + } + + /** + * class used to encapsulate a context which is normally used by + * a single TestSuite or across TestSuites when multi-threaded testing is turned on + */ + public static class Context { + protected String testDir; + protected ManyMiniCluster cluster; + + protected Configuration hbaseConf; + protected Configuration jobConf; + protected HiveConf hiveConf; + + protected FileSystem fileSystem; + + protected int usageCount = 0; + + public Context(String handle) { + testDir = new File(TEST_DIR + "/test_" + handle + "_" + Math.abs(new Random().nextLong()) + "/").getPath(); + System.out.println("Cluster work directory: " + testDir); + } + + public void start() { + if (usageCount++ == 0) { + ManyMiniCluster.Builder b = ManyMiniCluster.create(new File(testDir)); + if (testConf != null) { + b.hbaseConf(HBaseConfiguration.create(testConf)); + } + cluster = b.build(); + cluster.start(); + this.hbaseConf = cluster.getHBaseConf(); + jobConf = cluster.getJobConf(); + fileSystem = cluster.getFileSystem(); + hiveConf = cluster.getHiveConf(); + } + } + + public void stop() { + if (--usageCount == 0) { + try { + cluster.stop(); + cluster = null; + } finally { + System.out.println("Trying to cleanup: " + testDir); + try { + FileSystem fs = FileSystem.get(jobConf); + fs.delete(new Path(testDir), true); + } catch (IOException e) { + throw new IllegalStateException("Failed to cleanup test dir", e); + } + + } + } + } + + public String getTestDir() { + return testDir; + } + + public ManyMiniCluster getCluster() { + return cluster; + } + + public Configuration getHbaseConf() { + return hbaseConf; + } + + public Configuration getJobConf() { + return jobConf; + } + + public HiveConf getHiveConf() { + return hiveConf; + } + + public FileSystem getFileSystem() { + return fileSystem; + } + } + +} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/TestHBaseBulkOutputFormat.java hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/TestHBaseBulkOutputFormat.java new file mode 100644 index 0000000..df328ef --- /dev/null +++ hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/TestHBaseBulkOutputFormat.java @@ -0,0 +1,631 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.JobClient; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.OutputCollector; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.RunningJob; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hive.hcatalog.cli.HCatDriver; +import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.hbase.HBaseBulkOutputFormat.HBaseBulkOutputCommitter; +import org.apache.hive.hcatalog.hbase.TestHBaseDirectOutputFormat.MapReadAbortedTransaction; +import org.apache.hive.hcatalog.hbase.TestHBaseDirectOutputFormat.MapWriteAbortTransaction; +import org.apache.hive.hcatalog.hbase.snapshot.FamilyRevision; +import org.apache.hive.hcatalog.hbase.snapshot.RevisionManager; +import org.apache.hive.hcatalog.hbase.snapshot.RevisionManagerConfiguration; +import org.apache.hive.hcatalog.hbase.snapshot.TableSnapshot; +import org.apache.hive.hcatalog.hbase.snapshot.Transaction; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hive.hcatalog.mapreduce.HCatOutputFormat; +import org.apache.hive.hcatalog.mapreduce.OutputJobInfo; + +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * Tests components of HBaseHCatStorageHandler using ManyMiniCluster. + * Including ImprtSequenceFile and HBaseBulkOutputFormat + */ +public class TestHBaseBulkOutputFormat extends SkeletonHBaseTest { + private final static Logger LOG = LoggerFactory.getLogger(TestHBaseBulkOutputFormat.class); + + private final HiveConf allConf; + private final HCatDriver hcatDriver; + + public TestHBaseBulkOutputFormat() { + allConf = getHiveConf(); + allConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + allConf.set(HiveConf.ConfVars.HADOOPFS.varname, getFileSystem().getUri().toString()); + allConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, new Path(getTestDir(), "warehouse").toString()); + + //Add hbase properties + for (Map.Entry el : getHbaseConf()) + allConf.set(el.getKey(), el.getValue()); + for (Map.Entry el : getJobConf()) + allConf.set(el.getKey(), el.getValue()); + + HBaseConfiguration.merge( + allConf, + RevisionManagerConfiguration.create()); + SessionState.start(new CliSessionState(allConf)); + hcatDriver = new HCatDriver(); + } + + public static class MapWriteOldMapper implements org.apache.hadoop.mapred.Mapper { + + @Override + public void close() throws IOException { + } + + @Override + public void configure(JobConf job) { + } + + @Override + public void map(LongWritable key, Text value, + OutputCollector output, + Reporter reporter) throws IOException { + String vals[] = value.toString().split(","); + Put put = new Put(Bytes.toBytes(vals[0])); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + put.add(Bytes.toBytes("my_family"), + Bytes.toBytes(pair[0]), + Bytes.toBytes(pair[1])); + } + output.collect(new ImmutableBytesWritable(Bytes.toBytes(vals[0])), put); + } + + } + + public static class MapWrite extends Mapper { + + @Override + public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + String vals[] = value.toString().split(","); + Put put = new Put(Bytes.toBytes(vals[0])); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + put.add(Bytes.toBytes("my_family"), + Bytes.toBytes(pair[0]), + Bytes.toBytes(pair[1])); + } + context.write(new ImmutableBytesWritable(Bytes.toBytes(vals[0])), put); + } + } + + public static class MapHCatWrite extends Mapper { + @Override + public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); + HCatRecord record = new DefaultHCatRecord(3); + HCatSchema schema = jobInfo.getOutputSchema(); + String vals[] = value.toString().split(","); + record.setInteger("key", schema, Integer.parseInt(vals[0])); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + record.set(pair[0], schema, pair[1]); + } + context.write(null, record); + } + } + + @Test + public void hbaseBulkOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException { + String testName = "hbaseBulkOutputFormatTest"; + Path methodTestDir = new Path(getTestDir(), testName); + LOG.info("starting: " + testName); + + String tableName = newTableName(testName).toLowerCase(); + String familyName = "my_family"; + byte[] familyNameBytes = Bytes.toBytes(familyName); + + //include hbase config in conf file + Configuration conf = new Configuration(allConf); + + //create table + conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); + conf.set("yarn.scheduler.capacity.root.queues", "default"); + conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); + createTable(tableName, new String[]{familyName}); + + String data[] = {"1,english:one,spanish:uno", + "2,english:two,spanish:dos", + "3,english:three,spanish:tres"}; + + + // input/output settings + Path inputPath = new Path(methodTestDir, "mr_input"); + FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); + for (String line : data) + os.write(Bytes.toBytes(line + "\n")); + os.close(); + Path interPath = new Path(methodTestDir, "inter"); + //create job + JobConf job = new JobConf(conf); + job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapWriteOldMapper.class); + + job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); + org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath); + + job.setOutputFormat(HBaseBulkOutputFormat.class); + org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(job, interPath); + job.setOutputCommitter(HBaseBulkOutputCommitter.class); + + //manually create transaction + RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); + try { + OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null); + Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName)); + outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, + HCatUtil.serialize(txn)); + job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, + HCatUtil.serialize(outputJobInfo)); + } finally { + rm.close(); + } + + job.setMapOutputKeyClass(ImmutableBytesWritable.class); + job.setMapOutputValueClass(HCatRecord.class); + + job.setOutputKeyClass(ImmutableBytesWritable.class); + job.setOutputValueClass(HCatRecord.class); + + job.setNumReduceTasks(0); + + RunningJob runJob = JobClient.runJob(job); + runJob.waitForCompletion(); + assertTrue(runJob.isSuccessful()); + + //verify + HTable table = new HTable(conf, tableName); + Scan scan = new Scan(); + scan.addFamily(familyNameBytes); + ResultScanner scanner = table.getScanner(scan); + int index = 0; + for (Result result : scanner) { + String vals[] = data[index].toString().split(","); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); + assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); + } + index++; + } + //test if load count is the same + assertEquals(data.length, index); + //test if scratch directory was erased + assertFalse(FileSystem.get(job).exists(interPath)); + } + + @Test + public void importSequenceFileTest() throws IOException, ClassNotFoundException, InterruptedException { + String testName = "importSequenceFileTest"; + Path methodTestDir = new Path(getTestDir(), testName); + LOG.info("starting: " + testName); + + String tableName = newTableName(testName).toLowerCase(); + String familyName = "my_family"; + byte[] familyNameBytes = Bytes.toBytes(familyName); + + //include hbase config in conf file + Configuration conf = new Configuration(allConf); + + //create table + createTable(tableName, new String[]{familyName}); + + String data[] = {"1,english:one,spanish:uno", + "2,english:two,spanish:dos", + "3,english:three,spanish:tres"}; + + + // input/output settings + Path inputPath = new Path(methodTestDir, "mr_input"); + getFileSystem().mkdirs(inputPath); + FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); + for (String line : data) + os.write(Bytes.toBytes(line + "\n")); + os.close(); + Path interPath = new Path(methodTestDir, "inter"); + Path scratchPath = new Path(methodTestDir, "scratch"); + + + //create job + Job job = new Job(conf, testName); + job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapWrite.class); + + job.setInputFormatClass(TextInputFormat.class); + TextInputFormat.setInputPaths(job, inputPath); + + job.setOutputFormatClass(SequenceFileOutputFormat.class); + SequenceFileOutputFormat.setOutputPath(job, interPath); + + job.setMapOutputKeyClass(ImmutableBytesWritable.class); + job.setMapOutputValueClass(Put.class); + + job.setOutputKeyClass(ImmutableBytesWritable.class); + job.setOutputValueClass(Put.class); + + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + + job = new Job(new Configuration(allConf), testName + "_importer"); + assertTrue(ImportSequenceFile.runJob(job, tableName, interPath, scratchPath)); + + //verify + HTable table = new HTable(conf, tableName); + Scan scan = new Scan(); + scan.addFamily(familyNameBytes); + ResultScanner scanner = table.getScanner(scan); + int index = 0; + for (Result result : scanner) { + String vals[] = data[index].toString().split(","); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); + assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); + } + index++; + } + //test if load count is the same + assertEquals(data.length, index); + //test if scratch directory was erased + assertFalse(FileSystem.get(job.getConfiguration()).exists(scratchPath)); + } + + @Test + public void bulkModeHCatOutputFormatTest() throws Exception { + String testName = "bulkModeHCatOutputFormatTest"; + Path methodTestDir = new Path(getTestDir(), testName); + LOG.info("starting: " + testName); + + String databaseName = testName.toLowerCase(); + String dbDir = new Path(methodTestDir, "DB_" + testName).toString(); + String tableName = newTableName(testName).toLowerCase(); + String familyName = "my_family"; + byte[] familyNameBytes = Bytes.toBytes(familyName); + + + //include hbase config in conf file + Configuration conf = new Configuration(allConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); + + + String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir + "'"; + String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + + "(key int, english string, spanish string) STORED BY " + + "'org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('" + HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY + "'='true'," + + "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName + ":spanish')"; + + assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); + assertEquals(0, hcatDriver.run(tableQuery).getResponseCode()); + + String data[] = {"1,english:ONE,spanish:UNO", + "2,english:TWO,spanish:DOS", + "3,english:THREE,spanish:TRES"}; + + // input/output settings + Path inputPath = new Path(methodTestDir, "mr_input"); + getFileSystem().mkdirs(inputPath); + //create multiple files so we can test with multiple mappers + for (int i = 0; i < data.length; i++) { + FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile" + i + ".txt")); + os.write(Bytes.toBytes(data[i] + "\n")); + os.close(); + } + + //create job + Job job = new Job(conf, testName); + job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapHCatWrite.class); + + job.setInputFormatClass(TextInputFormat.class); + TextInputFormat.setInputPaths(job, inputPath); + + + job.setOutputFormatClass(HCatOutputFormat.class); + OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, tableName, null); + HCatOutputFormat.setOutput(job, outputJobInfo); + + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(HCatRecord.class); + + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(HCatRecord.class); + + job.setNumReduceTasks(0); + + assertTrue(job.waitForCompletion(true)); + RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); + try { + TableSnapshot snapshot = rm.createSnapshot(databaseName + "." + tableName); + for (String el : snapshot.getColumnFamilies()) { + assertEquals(1, snapshot.getRevision(el)); + } + } finally { + rm.close(); + } + + //verify + HTable table = new HTable(conf, databaseName + "." + tableName); + Scan scan = new Scan(); + scan.addFamily(familyNameBytes); + ResultScanner scanner = table.getScanner(scan); + int index = 0; + for (Result result : scanner) { + String vals[] = data[index].toString().split(","); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); + assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); + assertEquals(1l, result.getColumn(familyNameBytes, Bytes.toBytes(pair[0])).get(0).getTimestamp()); + } + index++; + } + //test if load count is the same + assertEquals(data.length, index); + } + + @Test + public void bulkModeHCatOutputFormatTestWithDefaultDB() throws Exception { + String testName = "bulkModeHCatOutputFormatTestWithDefaultDB"; + Path methodTestDir = new Path(getTestDir(), testName); + + String databaseName = "default"; + String dbDir = new Path(methodTestDir, "DB_" + testName).toString(); + String tableName = newTableName(testName).toLowerCase(); + String familyName = "my_family"; + byte[] familyNameBytes = Bytes.toBytes(familyName); + + + //include hbase config in conf file + Configuration conf = new Configuration(allConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); + + + String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir + "'"; + String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + + "(key int, english string, spanish string) STORED BY " + + "'org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('" + HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY + "'='true'," + + "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName + ":spanish')"; + + assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); + assertEquals(0, hcatDriver.run(tableQuery).getResponseCode()); + + String data[] = {"1,english:ONE,spanish:UNO", + "2,english:TWO,spanish:DOS", + "3,english:THREE,spanish:TRES"}; + + // input/output settings + Path inputPath = new Path(methodTestDir, "mr_input"); + getFileSystem().mkdirs(inputPath); + FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); + for (String line : data) + os.write(Bytes.toBytes(line + "\n")); + os.close(); + + //create job + Job job = new Job(conf, testName); + job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapHCatWrite.class); + + job.setInputFormatClass(TextInputFormat.class); + TextInputFormat.setInputPaths(job, inputPath); + + + job.setOutputFormatClass(HCatOutputFormat.class); + OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, tableName, null); + HCatOutputFormat.setOutput(job, outputJobInfo); + + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(HCatRecord.class); + + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(HCatRecord.class); + + job.setNumReduceTasks(0); + + assertTrue(job.waitForCompletion(true)); + + //verify + HTable table = new HTable(conf, tableName); + Scan scan = new Scan(); + scan.addFamily(familyNameBytes); + ResultScanner scanner = table.getScanner(scan); + int index = 0; + for (Result result : scanner) { + String vals[] = data[index].toString().split(","); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); + assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); + } + index++; + } + //test if load count is the same + assertEquals(data.length, index); + } + + @Test + public void bulkModeAbortTest() throws Exception { + String testName = "bulkModeAbortTest"; + Path methodTestDir = new Path(getTestDir(), testName); + String databaseName = testName.toLowerCase(); + String dbDir = new Path(methodTestDir, "DB_" + testName).toString(); + String tableName = newTableName(testName).toLowerCase(); + String familyName = "my_family"; + + // include hbase config in conf file + Configuration conf = new Configuration(allConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); + + String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir + + "'"; + String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + + "(key int, english string, spanish string) STORED BY " + + "'org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('" + HBaseConstants.PROPERTY_BULK_OUTPUT_MODE_KEY + "'='true'," + + "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName + + ":spanish')"; + + assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); + assertEquals(0, hcatDriver.run(tableQuery).getResponseCode()); + + String data[] = {"1,english:ONE,spanish:UNO", + "2,english:TWO,spanish:DOS", + "3,english:THREE,spanish:TRES"}; + + Path inputPath = new Path(methodTestDir, "mr_input"); + getFileSystem().mkdirs(inputPath); + // create multiple files so we can test with multiple mappers + for (int i = 0; i < data.length; i++) { + FSDataOutputStream os = getFileSystem().create( + new Path(inputPath, "inputFile" + i + ".txt")); + os.write(Bytes.toBytes(data[i] + "\n")); + os.close(); + } + + Path workingDir = new Path(methodTestDir, "mr_abort"); + OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, + tableName, null); + Job job = configureJob(testName, + conf, workingDir, MapWriteAbortTransaction.class, + outputJobInfo, inputPath); + assertFalse(job.waitForCompletion(true)); + + // verify that revision manager has it as aborted transaction + RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); + try { + TableSnapshot snapshot = rm.createSnapshot(databaseName + "." + tableName); + for (String family : snapshot.getColumnFamilies()) { + assertEquals(1, snapshot.getRevision(family)); + List abortedWriteTransactions = rm.getAbortedWriteTransactions( + databaseName + "." + tableName, family); + assertEquals(1, abortedWriteTransactions.size()); + assertEquals(1, abortedWriteTransactions.get(0).getRevision()); + } + } finally { + rm.close(); + } + + //verify that hbase does not have any of the records. + //Since records are only written during commitJob, + //hbase should not have any records. + HTable table = new HTable(conf, databaseName + "." + tableName); + Scan scan = new Scan(); + scan.addFamily(Bytes.toBytes(familyName)); + ResultScanner scanner = table.getScanner(scan); + assertFalse(scanner.iterator().hasNext()); + + // verify that the storage handler input format returns empty results. + Path outputDir = new Path(getTestDir(), + "mapred/testHBaseTableBulkIgnoreAbortedTransactions"); + FileSystem fs = getFileSystem(); + if (fs.exists(outputDir)) { + fs.delete(outputDir, true); + } + job = new Job(conf, "hbase-bulk-aborted-transaction"); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapReadAbortedTransaction.class); + job.setInputFormatClass(HCatInputFormat.class); + HCatInputFormat.setInput(job, databaseName, tableName); + job.setOutputFormatClass(TextOutputFormat.class); + TextOutputFormat.setOutputPath(job, outputDir); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + } + + private Job configureJob(String jobName, Configuration conf, + Path workingDir, Class mapperClass, + OutputJobInfo outputJobInfo, Path inputPath) throws IOException { + Job job = new Job(conf, jobName); + job.setWorkingDirectory(workingDir); + job.setJarByClass(this.getClass()); + job.setMapperClass(mapperClass); + + job.setInputFormatClass(TextInputFormat.class); + TextInputFormat.setInputPaths(job, inputPath); + job.setOutputFormatClass(HCatOutputFormat.class); + HCatOutputFormat.setOutput(job, outputJobInfo); + + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(HCatRecord.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(HCatRecord.class); + + job.setNumReduceTasks(0); + return job; + } + +} + diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/TestHBaseDirectOutputFormat.java hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/TestHBaseDirectOutputFormat.java new file mode 100644 index 0000000..aadf472 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/TestHBaseDirectOutputFormat.java @@ -0,0 +1,501 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.mapred.TableOutputFormat; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapred.JobClient; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.OutputCollector; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.RunningJob; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hive.hcatalog.cli.HCatDriver; +import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.hbase.snapshot.FamilyRevision; +import org.apache.hive.hcatalog.hbase.snapshot.RevisionManager; +import org.apache.hive.hcatalog.hbase.snapshot.RevisionManagerConfiguration; +import org.apache.hive.hcatalog.hbase.snapshot.TableSnapshot; +import org.apache.hive.hcatalog.hbase.snapshot.Transaction; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hive.hcatalog.mapreduce.HCatOutputFormat; +import org.apache.hive.hcatalog.mapreduce.OutputJobInfo; +import org.junit.Test; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertTrue; + +/** + * Test HBaseDirectOUtputFormat and HBaseHCatStorageHandler using a MiniCluster + */ +public class TestHBaseDirectOutputFormat extends SkeletonHBaseTest { + + private final HiveConf allConf; + private final HCatDriver hcatDriver; + + public TestHBaseDirectOutputFormat() { + allConf = getHiveConf(); + allConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + allConf.set(HiveConf.ConfVars.HADOOPFS.varname, getFileSystem().getUri().toString()); + allConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, new Path(getTestDir(), "warehouse").toString()); + + //Add hbase properties + for (Map.Entry el : getHbaseConf()) + allConf.set(el.getKey(), el.getValue()); + for (Map.Entry el : getJobConf()) + allConf.set(el.getKey(), el.getValue()); + HBaseConfiguration.merge( + allConf, + RevisionManagerConfiguration.create()); + SessionState.start(new CliSessionState(allConf)); + hcatDriver = new HCatDriver(); + } + + @Test + public void directOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException { + String testName = "directOutputFormatTest"; + Path methodTestDir = new Path(getTestDir(), testName); + + String tableName = newTableName(testName).toLowerCase(); + String familyName = "my_family"; + byte[] familyNameBytes = Bytes.toBytes(familyName); + + //include hbase config in conf file + Configuration conf = new Configuration(allConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); + + //create table + createTable(tableName, new String[]{familyName}); + + String data[] = {"1,english:ONE,spanish:UNO", + "2,english:ONE,spanish:DOS", + "3,english:ONE,spanish:TRES"}; + + + // input/output settings + Path inputPath = new Path(methodTestDir, "mr_input"); + getFileSystem().mkdirs(inputPath); + FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt")); + for (String line : data) + os.write(Bytes.toBytes(line + "\n")); + os.close(); + + //create job + JobConf job = new JobConf(conf); + job.setJobName(testName); + job.setWorkingDirectory(new Path(methodTestDir, "mr_work")); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapWrite.class); + + job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); + org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath); + + job.setOutputFormat(HBaseDirectOutputFormat.class); + job.set(TableOutputFormat.OUTPUT_TABLE, tableName); + job.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName); + + //manually create transaction + RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); + try { + OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null); + Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName)); + outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY, + HCatUtil.serialize(txn)); + job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, + HCatUtil.serialize(outputJobInfo)); + } finally { + rm.close(); + } + + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(HCatRecord.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(HCatRecord.class); + job.setNumReduceTasks(0); + + RunningJob runJob = JobClient.runJob(job); + runJob.waitForCompletion(); + assertTrue(runJob.isSuccessful()); + + //verify + HTable table = new HTable(conf, tableName); + Scan scan = new Scan(); + scan.addFamily(familyNameBytes); + ResultScanner scanner = table.getScanner(scan); + int index = 0; + for (Result result : scanner) { + String vals[] = data[index].toString().split(","); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); + assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); + } + index++; + } + assertEquals(data.length, index); + } + + @Test + public void directHCatOutputFormatTest() throws Exception { + String testName = "directHCatOutputFormatTest"; + Path methodTestDir = new Path(getTestDir(), testName); + + String databaseName = testName; + String dbDir = new Path(methodTestDir, "DB_" + testName).toString(); + String tableName = newTableName(testName); + String familyName = "my_family"; + byte[] familyNameBytes = Bytes.toBytes(familyName); + //Table name will be lower case unless specified by hbase.table.name property + String hbaseTableName = (databaseName + "." + tableName).toLowerCase(); + + //include hbase config in conf file + Configuration conf = new Configuration(allConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); + + + String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir + "'"; + String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + + "(key int, english string, spanish string) STORED BY " + + "'org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES (" + + "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName + ":spanish')"; + + assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); + assertEquals(0, hcatDriver.run(tableQuery).getResponseCode()); + + String data[] = {"1,english:ONE,spanish:UNO", + "2,english:ONE,spanish:DOS", + "3,english:ONE,spanish:TRES"}; + + // input/output settings + Path inputPath = new Path(methodTestDir, "mr_input"); + getFileSystem().mkdirs(inputPath); + //create multiple files so we can test with multiple mappers + for (int i = 0; i < data.length; i++) { + FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile" + i + ".txt")); + os.write(Bytes.toBytes(data[i] + "\n")); + os.close(); + } + + //create job + Path workingDir = new Path(methodTestDir, "mr_work"); + OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, + tableName, null); + Job job = configureJob(testName, conf, workingDir, MapHCatWrite.class, + outputJobInfo, inputPath); + assertTrue(job.waitForCompletion(true)); + + RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); + try { + TableSnapshot snapshot = rm.createSnapshot(hbaseTableName); + for (String el : snapshot.getColumnFamilies()) { + assertEquals(1, snapshot.getRevision(el)); + } + } finally { + rm.close(); + } + + //verify + HTable table = new HTable(conf, hbaseTableName); + Scan scan = new Scan(); + scan.addFamily(familyNameBytes); + ResultScanner scanner = table.getScanner(scan); + int index = 0; + for (Result result : scanner) { + String vals[] = data[index].toString().split(","); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); + assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); + assertEquals(1l, result.getColumn(familyNameBytes, Bytes.toBytes(pair[0])).get(0).getTimestamp()); + } + index++; + } + assertEquals(data.length, index); + } + + @Test + public void directModeAbortTest() throws Exception { + String testName = "directModeAbortTest"; + Path methodTestDir = new Path(getTestDir(), testName); + String databaseName = testName; + String dbDir = new Path(methodTestDir, "DB_" + testName).toString(); + String tableName = newTableName(testName); + String familyName = "my_family"; + byte[] familyNameBytes = Bytes.toBytes(familyName); + //Table name as specified by hbase.table.name property + String hbaseTableName = tableName; + + // include hbase config in conf file + Configuration conf = new Configuration(allConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties())); + + String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + dbDir + + "'"; + String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + + "(key int, english string, spanish string) STORED BY " + + "'org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES (" + + "'hbase.columns.mapping'=':key," + familyName + ":english," + familyName + + ":spanish','hbase.table.name'='" + hbaseTableName + "')"; + + assertEquals(0, hcatDriver.run(dbquery).getResponseCode()); + assertEquals(0, hcatDriver.run(tableQuery).getResponseCode()); + + String data[] = {"1,english:ONE,spanish:UNO", + "2,english:TWO,spanish:DOS", + "3,english:THREE,spanish:TRES"}; + + Path inputPath = new Path(methodTestDir, "mr_input"); + getFileSystem().mkdirs(inputPath); + // create multiple files so we can test with multiple mappers + for (int i = 0; i < data.length; i++) { + FSDataOutputStream os = getFileSystem().create( + new Path(inputPath, "inputFile" + i + ".txt")); + os.write(Bytes.toBytes(data[i] + "\n")); + os.close(); + } + + Path workingDir = new Path(methodTestDir, "mr_abort"); + OutputJobInfo outputJobInfo = OutputJobInfo.create(databaseName, + tableName, null); + Job job = configureJob(testName, conf, workingDir, MapWriteAbortTransaction.class, + outputJobInfo, inputPath); + assertFalse(job.waitForCompletion(true)); + + // verify that revision manager has it as aborted transaction + RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf); + try { + TableSnapshot snapshot = rm.createSnapshot(hbaseTableName); + for (String family : snapshot.getColumnFamilies()) { + assertEquals(1, snapshot.getRevision(family)); + List abortedWriteTransactions = rm.getAbortedWriteTransactions( + hbaseTableName, family); + assertEquals(1, abortedWriteTransactions.size()); + assertEquals(1, abortedWriteTransactions.get(0).getRevision()); + } + } finally { + rm.close(); + } + + // verify that hbase has the records of the successful maps. + HTable table = new HTable(conf, hbaseTableName); + Scan scan = new Scan(); + scan.addFamily(familyNameBytes); + ResultScanner scanner = table.getScanner(scan); + int count = 0; + for (Result result : scanner) { + String key = Bytes.toString(result.getRow()); + assertNotSame(MapWriteAbortTransaction.failedKey, key); + int index = Integer.parseInt(key) - 1; + String vals[] = data[index].toString().split(","); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0]))); + assertEquals(pair[1], + Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0])))); + assertEquals(1l, result.getColumn(familyNameBytes, Bytes.toBytes(pair[0])).get(0) + .getTimestamp()); + } + count++; + } + assertEquals(data.length - 1, count); + + // verify that the inputformat returns empty results. + Path outputDir = new Path(getTestDir(), + "mapred/testHBaseTableIgnoreAbortedTransactions"); + FileSystem fs = getFileSystem(); + if (fs.exists(outputDir)) { + fs.delete(outputDir, true); + } + job = new Job(conf, "hbase-aborted-transaction"); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapReadAbortedTransaction.class); + job.setInputFormatClass(HCatInputFormat.class); + HCatInputFormat.setInput(job, databaseName, tableName); + job.setOutputFormatClass(TextOutputFormat.class); + TextOutputFormat.setOutputPath(job, outputDir); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + } + + private Job configureJob(String jobName, Configuration conf, + Path workingDir, Class mapperClass, + OutputJobInfo outputJobInfo, Path inputPath) throws IOException { + Job job = new Job(conf, jobName); + job.setWorkingDirectory(workingDir); + job.setJarByClass(this.getClass()); + job.setMapperClass(mapperClass); + + job.setInputFormatClass(TextInputFormat.class); + TextInputFormat.setInputPaths(job, inputPath); + job.setOutputFormatClass(HCatOutputFormat.class); + HCatOutputFormat.setOutput(job, outputJobInfo); + String txnString = job.getConfiguration().get(HBaseConstants.PROPERTY_WRITE_TXN_KEY); + //Test passing in same OutputJobInfo multiple times and verify 1 transaction is created + String jobString = job.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO); + outputJobInfo = (OutputJobInfo) HCatUtil.deserialize(jobString); + Job job2 = new Job(conf); + HCatOutputFormat.setOutput(job2, outputJobInfo); + assertEquals(txnString, job2.getConfiguration().get(HBaseConstants.PROPERTY_WRITE_TXN_KEY)); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(HCatRecord.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(HCatRecord.class); + + job.setNumReduceTasks(0); + return job; + } + + public static class MapHCatWrite extends Mapper { + + @Override + public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); + HCatRecord record = new DefaultHCatRecord(3); + HCatSchema schema = jobInfo.getOutputSchema(); + String vals[] = value.toString().split(","); + record.setInteger("key", schema, Integer.parseInt(vals[0])); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + record.set(pair[0], schema, pair[1]); + } + context.write(null, record); + } + } + + public static class MapWrite implements org.apache.hadoop.mapred.Mapper { + + @Override + public void configure(JobConf job) { + } + + @Override + public void close() throws IOException { + } + + @Override + public void map(LongWritable key, Text value, + OutputCollector output, Reporter reporter) + throws IOException { + String vals[] = value.toString().split(","); + Put put = new Put(Bytes.toBytes(vals[0])); + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + put.add(Bytes.toBytes("my_family"), + Bytes.toBytes(pair[0]), + Bytes.toBytes(pair[1])); + } + output.collect(null, put); + } + } + + static class MapWriteAbortTransaction extends Mapper { + public static String failedKey; + private static int count = 0; + + @Override + public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { + OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO)); + HCatRecord record = new DefaultHCatRecord(3); + HCatSchema schema = jobInfo.getOutputSchema(); + String vals[] = value.toString().split(","); + record.setInteger("key", schema, Integer.parseInt(vals[0])); + synchronized (MapWriteAbortTransaction.class) { + if (count == 2) { + failedKey = vals[0]; + throw new IOException("Failing map to test abort"); + } + for (int i = 1; i < vals.length; i++) { + String pair[] = vals[i].split(":"); + record.set(pair[0], schema, pair[1]); + } + context.write(null, record); + count++; + } + + } + + } + + static class MapReadAbortedTransaction + extends + Mapper, Text> { + + @Override + public void run(Context context) throws IOException, + InterruptedException { + setup(context); + if (context.nextKeyValue()) { + map(context.getCurrentKey(), context.getCurrentValue(), context); + while (context.nextKeyValue()) { + map(context.getCurrentKey(), context.getCurrentValue(), + context); + } + throw new IOException("There should have been no records"); + } + cleanup(context); + } + + @Override + public void map(ImmutableBytesWritable key, HCatRecord value, + Context context) throws IOException, InterruptedException { + System.out.println("HCat record value" + value.toString()); + } + } +} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/TestHBaseHCatStorageHandler.java hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/TestHBaseHCatStorageHandler.java new file mode 100644 index 0000000..f1fd54a --- /dev/null +++ hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/TestHBaseHCatStorageHandler.java @@ -0,0 +1,241 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.hbase; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.net.URI; +import java.util.Map; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hive.hcatalog.cli.HCatDriver; +import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; +import org.apache.hive.hcatalog.hbase.snapshot.RevisionManager; +import org.apache.hive.hcatalog.hbase.snapshot.RevisionManagerConfiguration; +import org.apache.zookeeper.KeeperException.NoNodeException; +import org.junit.Test; + +public class TestHBaseHCatStorageHandler extends SkeletonHBaseTest { + + private static HiveConf hcatConf; + private static HCatDriver hcatDriver; + private static Warehouse wh; + + public void Initialize() throws Exception { + + hcatConf = getHiveConf(); + hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + URI fsuri = getFileSystem().getUri(); + Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), + getTestDir()); + hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); + hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); + + //Add hbase properties + for (Map.Entry el : getHbaseConf()) { + if (el.getKey().startsWith("hbase.")) { + hcatConf.set(el.getKey(), el.getValue()); + } + } + HBaseConfiguration.merge( + hcatConf, + RevisionManagerConfiguration.create()); + + SessionState.start(new CliSessionState(hcatConf)); + hcatDriver = new HCatDriver(); + + } + + @Test + public void testTableCreateDrop() throws Exception { + Initialize(); + + hcatDriver.run("drop table test_table"); + CommandProcessorResponse response = hcatDriver + .run("create table test_table(key int, value string) STORED BY " + + "'org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:val')"); + + assertEquals(0, response.getResponseCode()); + + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists("test_table"); + + assertTrue(doesTableExist); + + RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hcatConf); + rm.open(); + //Should be able to successfully query revision manager + rm.getAbortedWriteTransactions("test_table", "cf1"); + + hcatDriver.run("drop table test_table"); + doesTableExist = hAdmin.tableExists("test_table"); + assertTrue(doesTableExist == false); + + try { + rm.getAbortedWriteTransactions("test_table", "cf1"); + } catch (Exception e) { + assertTrue(e.getCause() instanceof NoNodeException); + } + rm.close(); + + } + + @Test + public void testTableCreateDropDifferentCase() throws Exception { + Initialize(); + + hcatDriver.run("drop table test_Table"); + CommandProcessorResponse response = hcatDriver + .run("create table test_Table(key int, value string) STORED BY " + + "'org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:val')"); + + assertEquals(0, response.getResponseCode()); + + //HBase table gets created with lower case unless specified as a table property. + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists("test_table"); + + assertTrue(doesTableExist); + + RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hcatConf); + rm.open(); + //Should be able to successfully query revision manager + rm.getAbortedWriteTransactions("test_table", "cf1"); + + hcatDriver.run("drop table test_table"); + doesTableExist = hAdmin.tableExists("test_table"); + assertTrue(doesTableExist == false); + + try { + rm.getAbortedWriteTransactions("test_table", "cf1"); + } catch (Exception e) { + assertTrue(e.getCause() instanceof NoNodeException); + } + rm.close(); + + } + + @Test + public void testTableCreateDropCaseSensitive() throws Exception { + Initialize(); + + hcatDriver.run("drop table test_Table"); + CommandProcessorResponse response = hcatDriver + .run("create table test_Table(key int, value string) STORED BY " + + "'org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:val'," + + " 'hbase.table.name'='CaseSensitiveTable')"); + + assertEquals(0, response.getResponseCode()); + + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists("CaseSensitiveTable"); + + assertTrue(doesTableExist); + + RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hcatConf); + rm.open(); + //Should be able to successfully query revision manager + rm.getAbortedWriteTransactions("CaseSensitiveTable", "cf1"); + + hcatDriver.run("drop table test_table"); + doesTableExist = hAdmin.tableExists("CaseSensitiveTable"); + assertTrue(doesTableExist == false); + + try { + rm.getAbortedWriteTransactions("CaseSensitiveTable", "cf1"); + } catch (Exception e) { + assertTrue(e.getCause() instanceof NoNodeException); + } + rm.close(); + + } + + @Test + public void testTableDropNonExistent() throws Exception { + Initialize(); + + hcatDriver.run("drop table mytable"); + CommandProcessorResponse response = hcatDriver + .run("create table mytable(key int, value string) STORED BY " + + "'org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:val')"); + + assertEquals(0, response.getResponseCode()); + + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists("mytable"); + assertTrue(doesTableExist); + + //Now delete the table from hbase + if (hAdmin.isTableEnabled("mytable")) { + hAdmin.disableTable("mytable"); + } + hAdmin.deleteTable("mytable"); + doesTableExist = hAdmin.tableExists("mytable"); + assertTrue(doesTableExist == false); + + CommandProcessorResponse responseTwo = hcatDriver.run("drop table mytable"); + assertTrue(responseTwo.getResponseCode() == 0); + + } + + @Test + public void testTableCreateExternal() throws Exception { + + String tableName = "testTable"; + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + + HTableDescriptor tableDesc = new HTableDescriptor(tableName); + tableDesc.addFamily(new HColumnDescriptor(Bytes.toBytes("key"))); + tableDesc.addFamily(new HColumnDescriptor(Bytes.toBytes("familyone"))); + tableDesc.addFamily(new HColumnDescriptor(Bytes.toBytes("familytwo"))); + + hAdmin.createTable(tableDesc); + boolean doesTableExist = hAdmin.tableExists(tableName); + assertTrue(doesTableExist); + + hcatDriver.run("drop table mytabletwo"); + CommandProcessorResponse response = hcatDriver + .run("create external table mytabletwo(key int, valueone string, valuetwo string) STORED BY " + + "'org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key,familyone:val,familytwo:val'," + + "'hbase.table.name'='testTable')"); + + assertEquals(0, response.getResponseCode()); + + } + + +} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/TestHBaseInputFormat.java hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/TestHBaseInputFormat.java new file mode 100644 index 0000000..92430d1 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/TestHBaseInputFormat.java @@ -0,0 +1,609 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.hbase; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.mapreduce.TableInputFormat; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapred.JobClient; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.OutputCollector; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.RunningJob; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hive.hcatalog.cli.HCatDriver; +import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.HCatRecord; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchema; +import org.apache.hive.hcatalog.hbase.snapshot.RevisionManager; +import org.apache.hive.hcatalog.hbase.snapshot.RevisionManagerConfiguration; +import org.apache.hive.hcatalog.hbase.snapshot.Transaction; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; +import org.apache.hive.hcatalog.mapreduce.PartInfo; +import org.junit.Test; + +public class TestHBaseInputFormat extends SkeletonHBaseTest { + + private static HiveConf hcatConf; + private static HCatDriver hcatDriver; + private final byte[] FAMILY = Bytes.toBytes("testFamily"); + private final byte[] QUALIFIER1 = Bytes.toBytes("testQualifier1"); + private final byte[] QUALIFIER2 = Bytes.toBytes("testQualifier2"); + + public TestHBaseInputFormat() throws Exception { + hcatConf = getHiveConf(); + hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + URI fsuri = getFileSystem().getUri(); + Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), + getTestDir()); + hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); + hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); + + //Add hbase properties + + for (Map.Entry el : getHbaseConf()) { + if (el.getKey().startsWith("hbase.")) { + hcatConf.set(el.getKey(), el.getValue()); + } + } + HBaseConfiguration.merge(hcatConf, + RevisionManagerConfiguration.create()); + + + SessionState.start(new CliSessionState(hcatConf)); + hcatDriver = new HCatDriver(); + + } + + private List generatePuts(int num, String tableName) throws IOException { + + List columnFamilies = Arrays.asList("testFamily"); + RevisionManager rm = null; + List myPuts; + try { + rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hcatConf); + rm.open(); + myPuts = new ArrayList(); + for (int i = 1; i <= num; i++) { + Put put = new Put(Bytes.toBytes("testRow")); + put.add(FAMILY, QUALIFIER1, i, Bytes.toBytes("textValue-" + i)); + put.add(FAMILY, QUALIFIER2, i, Bytes.toBytes("textValue-" + i)); + myPuts.add(put); + Transaction tsx = rm.beginWriteTransaction(tableName, + columnFamilies); + rm.commitWriteTransaction(tsx); + } + } finally { + if (rm != null) + rm.close(); + } + + return myPuts; + } + + private void populateHBaseTable(String tName, int revisions) throws IOException { + List myPuts = generatePuts(revisions, tName); + HTable table = new HTable(getHbaseConf(), Bytes.toBytes(tName)); + table.put(myPuts); + } + + private long populateHBaseTableQualifier1(String tName, int value, Boolean commit) + throws IOException { + List columnFamilies = Arrays.asList("testFamily"); + RevisionManager rm = null; + List myPuts = new ArrayList(); + long revision; + try { + rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(hcatConf); + rm.open(); + Transaction tsx = rm.beginWriteTransaction(tName, columnFamilies); + + Put put = new Put(Bytes.toBytes("testRow")); + revision = tsx.getRevisionNumber(); + put.add(FAMILY, QUALIFIER1, revision, + Bytes.toBytes("textValue-" + value)); + myPuts.add(put); + + // If commit is null it is left as a running transaction + if (commit != null) { + if (commit) { + rm.commitWriteTransaction(tsx); + } else { + rm.abortWriteTransaction(tsx); + } + } + } finally { + if (rm != null) + rm.close(); + } + HTable table = new HTable(getHbaseConf(), Bytes.toBytes(tName)); + table.put(myPuts); + return revision; + } + + @Test + public void TestHBaseTableReadMR() throws Exception { + String tableName = newTableName("MyTable"); + String databaseName = newTableName("MyDatabase"); + //Table name will be lower case unless specified by hbase.table.name property + String hbaseTableName = (databaseName + "." + tableName).toLowerCase(); + String db_dir = new Path(getTestDir(), "hbasedb").toString(); + + String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + + db_dir + "'"; + String tableQuery = "CREATE TABLE " + databaseName + "." + tableName + + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + + "'org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key,testFamily:testQualifier1,testFamily:testQualifier2')"; + + CommandProcessorResponse responseOne = hcatDriver.run(dbquery); + assertEquals(0, responseOne.getResponseCode()); + CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); + assertEquals(0, responseTwo.getResponseCode()); + + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists(hbaseTableName); + assertTrue(doesTableExist); + + populateHBaseTable(hbaseTableName, 5); + Configuration conf = new Configuration(hcatConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(getHiveConf().getAllProperties())); + + // output settings + Path outputDir = new Path(getTestDir(), "mapred/testHbaseTableMRRead"); + FileSystem fs = getFileSystem(); + if (fs.exists(outputDir)) { + fs.delete(outputDir, true); + } + // create job + Job job = new Job(conf, "hbase-mr-read-test"); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapReadHTable.class); + MapReadHTable.resetCounters(); + + job.setInputFormatClass(HCatInputFormat.class); + HCatInputFormat.setInput(job.getConfiguration(), databaseName, tableName); + job.setOutputFormatClass(TextOutputFormat.class); + TextOutputFormat.setOutputPath(job, outputDir); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + // Note: These asserts only works in case of LocalJobRunner as they run in same jvm. + // If using MiniMRCluster, the tests will have to be modified. + assertFalse(MapReadHTable.error); + assertEquals(MapReadHTable.count, 1); + + String dropTableQuery = "DROP TABLE " + hbaseTableName; + CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); + assertEquals(0, responseThree.getResponseCode()); + + boolean isHbaseTableThere = hAdmin.tableExists(hbaseTableName); + assertFalse(isHbaseTableThere); + + String dropDB = "DROP DATABASE " + databaseName; + CommandProcessorResponse responseFour = hcatDriver.run(dropDB); + assertEquals(0, responseFour.getResponseCode()); + } + + @Test + public void TestHBaseTableProjectionReadMR() throws Exception { + + String tableName = newTableName("MyTable"); + //Table name as specified by hbase.table.name property + String hbaseTableName = "MyDB_" + tableName; + String tableQuery = "CREATE TABLE " + tableName + + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + + "'org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=" + + "':key,testFamily:testQualifier1,testFamily:testQualifier2'," + + "'hbase.table.name'='" + hbaseTableName + "')"; + + CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); + assertEquals(0, responseTwo.getResponseCode()); + + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists(hbaseTableName); + assertTrue(doesTableExist); + + populateHBaseTable(hbaseTableName, 5); + + Configuration conf = new Configuration(hcatConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(getHiveConf().getAllProperties())); + + // output settings + Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableProjectionReadMR"); + FileSystem fs = getFileSystem(); + if (fs.exists(outputDir)) { + fs.delete(outputDir, true); + } + // create job + Job job = new Job(conf, "hbase-column-projection"); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapReadProjHTable.class); + job.setInputFormatClass(HCatInputFormat.class); + HCatInputFormat.setOutputSchema(job, getProjectionSchema()); + HCatInputFormat.setInput(job, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); + job.setOutputFormatClass(TextOutputFormat.class); + TextOutputFormat.setOutputPath(job, outputDir); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + assertFalse(MapReadProjHTable.error); + assertEquals(MapReadProjHTable.count, 1); + + String dropTableQuery = "DROP TABLE " + tableName; + CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); + assertEquals(0, responseThree.getResponseCode()); + + boolean isHbaseTableThere = hAdmin.tableExists(hbaseTableName); + assertFalse(isHbaseTableThere); + } + + @Test + public void TestHBaseInputFormatProjectionReadMR() throws Exception { + + String tableName = newTableName("mytable"); + String tableQuery = "CREATE TABLE " + tableName + + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + + "'org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key," + + "testFamily:testQualifier1,testFamily:testQualifier2')"; + + CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); + assertEquals(0, responseTwo.getResponseCode()); + + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists(tableName); + assertTrue(doesTableExist); + + populateHBaseTable(tableName, 5); + + Configuration conf = new Configuration(hcatConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(getHiveConf().getAllProperties())); + + // output settings + Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableProjectionReadMR"); + FileSystem fs = getFileSystem(); + if (fs.exists(outputDir)) { + fs.delete(outputDir, true); + } + // create job + JobConf job = new JobConf(conf); + job.setJobName("hbase-scan-column"); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapReadProjectionHTable.class); + job.setInputFormat(HBaseInputFormat.class); + + //Configure projection schema + job.set(HCatConstants.HCAT_KEY_OUTPUT_SCHEMA, HCatUtil.serialize(getProjectionSchema())); + Job newJob = new Job(job); + HCatInputFormat.setInput(newJob, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); + String inputJobString = newJob.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO); + InputJobInfo info = (InputJobInfo) HCatUtil.deserialize(inputJobString); + job.set(HCatConstants.HCAT_KEY_JOB_INFO, inputJobString); + for (PartInfo partinfo : info.getPartitions()) { + for (Entry entry : partinfo.getJobProperties().entrySet()) + job.set(entry.getKey(), entry.getValue()); + } + assertEquals("testFamily:testQualifier1", job.get(TableInputFormat.SCAN_COLUMNS)); + + job.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class); + org.apache.hadoop.mapred.TextOutputFormat.setOutputPath(job, outputDir); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + + RunningJob runJob = JobClient.runJob(job); + runJob.waitForCompletion(); + assertTrue(runJob.isSuccessful()); + assertFalse(MapReadProjHTable.error); + assertEquals(MapReadProjHTable.count, 1); + + String dropTableQuery = "DROP TABLE " + tableName; + CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); + assertEquals(0, responseThree.getResponseCode()); + + boolean isHbaseTableThere = hAdmin.tableExists(tableName); + assertFalse(isHbaseTableThere); + } + + @Test + public void TestHBaseTableIgnoreAbortedTransactions() throws Exception { + String tableName = newTableName("mytable"); + String tableQuery = "CREATE TABLE " + tableName + + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + + "'org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key," + + "testFamily:testQualifier1,testFamily:testQualifier2')"; + + CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); + assertEquals(0, responseTwo.getResponseCode()); + + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists(tableName); + assertTrue(doesTableExist); + + populateHBaseTable(tableName, 5); + populateHBaseTableQualifier1(tableName, 6, false); + populateHBaseTableQualifier1(tableName, 7, false); + + Configuration conf = new Configuration(hcatConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(getHiveConf().getAllProperties())); + + Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableIgnoreAbortedTransactions"); + FileSystem fs = getFileSystem(); + if (fs.exists(outputDir)) { + fs.delete(outputDir, true); + } + Job job = new Job(conf, "hbase-aborted-transaction"); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapReadHTable.class); + MapReadHTable.resetCounters(); + job.setInputFormatClass(HCatInputFormat.class); + HCatInputFormat.setInput(job, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); + job.setOutputFormatClass(TextOutputFormat.class); + TextOutputFormat.setOutputPath(job, outputDir); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + // Verify that the records do not contain aborted transaction + // revisions 6 and 7 for testFamily:testQualifier1 and + // fetches revision 5 for both testQualifier1 and testQualifier2 + assertFalse(MapReadHTable.error); + assertEquals(1, MapReadHTable.count); + + String dropTableQuery = "DROP TABLE " + tableName; + CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); + assertEquals(0, responseThree.getResponseCode()); + + boolean isHbaseTableThere = hAdmin.tableExists(tableName); + assertFalse(isHbaseTableThere); + } + + @Test + public void TestHBaseTableIgnoreAbortedAndRunningTransactions() throws Exception { + String tableName = newTableName("mytable"); + String tableQuery = "CREATE TABLE " + tableName + + "(key string, testqualifier1 string, testqualifier2 string) STORED BY " + + "'org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key," + + "testFamily:testQualifier1,testFamily:testQualifier2')"; + + CommandProcessorResponse responseTwo = hcatDriver.run(tableQuery); + assertEquals(0, responseTwo.getResponseCode()); + + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists(tableName); + assertTrue(doesTableExist); + + populateHBaseTable(tableName, 2); + populateHBaseTableQualifier1(tableName, 3, Boolean.TRUE); //Committed transaction + populateHBaseTableQualifier1(tableName, 4, null); //Running transaction + populateHBaseTableQualifier1(tableName, 5, Boolean.FALSE); //Aborted transaction + populateHBaseTableQualifier1(tableName, 6, Boolean.TRUE); //Committed transaction + populateHBaseTableQualifier1(tableName, 7, null); //Running Transaction + populateHBaseTableQualifier1(tableName, 8, Boolean.FALSE); //Aborted Transaction + + Configuration conf = new Configuration(hcatConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(getHiveConf().getAllProperties())); + + Path outputDir = new Path(getTestDir(), "mapred/testHBaseTableIgnoreAbortedTransactions"); + FileSystem fs = getFileSystem(); + if (fs.exists(outputDir)) { + fs.delete(outputDir, true); + } + Job job = new Job(conf, "hbase-running-aborted-transaction"); + job.setJarByClass(this.getClass()); + job.setMapperClass(MapReadHTableRunningAbort.class); + job.setInputFormatClass(HCatInputFormat.class); + HCatInputFormat.setInput(job, MetaStoreUtils.DEFAULT_DATABASE_NAME, tableName); + job.setOutputFormatClass(TextOutputFormat.class); + TextOutputFormat.setOutputPath(job, outputDir); + job.setMapOutputKeyClass(BytesWritable.class); + job.setMapOutputValueClass(Text.class); + job.setOutputKeyClass(BytesWritable.class); + job.setOutputValueClass(Text.class); + job.setNumReduceTasks(0); + assertTrue(job.waitForCompletion(true)); + // Verify that the records do not contain running and aborted transaction + // and it fetches revision 2 for testQualifier1 and testQualifier2 + assertFalse(MapReadHTableRunningAbort.error); + assertEquals(1, MapReadHTableRunningAbort.count); + + String dropTableQuery = "DROP TABLE " + tableName; + CommandProcessorResponse responseThree = hcatDriver.run(dropTableQuery); + assertEquals(0, responseThree.getResponseCode()); + + boolean isHbaseTableThere = hAdmin.tableExists(tableName); + assertFalse(isHbaseTableThere); + } + + + static class MapReadHTable + extends + Mapper, Text> { + + static boolean error = false; + static int count = 0; + + @Override + public void map(ImmutableBytesWritable key, HCatRecord value, + Context context) throws IOException, InterruptedException { + System.out.println("HCat record value" + value.toString()); + boolean correctValues = (value.size() == 3) + && (value.get(0).toString()).equalsIgnoreCase("testRow") + && (value.get(1).toString()).equalsIgnoreCase("textValue-5") + && (value.get(2).toString()).equalsIgnoreCase("textValue-5"); + + if (correctValues == false) { + error = true; + } + count++; + } + + public static void resetCounters() { + error = false; + count = 0; + } + } + + static class MapReadProjHTable + extends + Mapper, Text> { + + static boolean error = false; + static int count = 0; + + @Override + public void map(ImmutableBytesWritable key, HCatRecord value, + Context context) throws IOException, InterruptedException { + System.out.println("HCat record value" + value.toString()); + boolean correctValues = (value.size() == 2) + && (value.get(0).toString()).equalsIgnoreCase("testRow") + && (value.get(1).toString()).equalsIgnoreCase("textValue-5"); + + if (correctValues == false) { + error = true; + } + count++; + } + } + + static class MapReadProjectionHTable + implements org.apache.hadoop.mapred.Mapper, Text> { + + static boolean error = false; + static int count = 0; + + @Override + public void configure(JobConf job) { + } + + @Override + public void close() throws IOException { + } + + @Override + public void map(ImmutableBytesWritable key, Result result, + OutputCollector, Text> output, Reporter reporter) + throws IOException { + System.out.println("Result " + result.toString()); + List list = result.list(); + boolean correctValues = (list.size() == 1) + && (Bytes.toString(list.get(0).getRow())).equalsIgnoreCase("testRow") + && (Bytes.toString(list.get(0).getValue())).equalsIgnoreCase("textValue-5") + && (Bytes.toString(list.get(0).getFamily())).equalsIgnoreCase("testFamily") + && (Bytes.toString(list.get(0).getQualifier())).equalsIgnoreCase("testQualifier1"); + + if (correctValues == false) { + error = true; + } + count++; + } + } + + static class MapReadHTableRunningAbort + extends + Mapper, Text> { + + static boolean error = false; + static int count = 0; + + @Override + public void map(ImmutableBytesWritable key, HCatRecord value, + Context context) throws IOException, InterruptedException { + System.out.println("HCat record value" + value.toString()); + boolean correctValues = (value.size() == 3) + && (value.get(0).toString()).equalsIgnoreCase("testRow") + && (value.get(1).toString()).equalsIgnoreCase("textValue-3") + && (value.get(2).toString()).equalsIgnoreCase("textValue-2"); + + if (correctValues == false) { + error = true; + } + count++; + } + } + + private HCatSchema getProjectionSchema() throws HCatException { + + HCatSchema schema = new HCatSchema(new ArrayList()); + schema.append(new HCatFieldSchema("key", HCatFieldSchema.Type.STRING, + "")); + schema.append(new HCatFieldSchema("testqualifier1", + HCatFieldSchema.Type.STRING, "")); + return schema; + } + + +} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/TestSnapshots.java hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/TestSnapshots.java new file mode 100644 index 0000000..cfe2e9d --- /dev/null +++ hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/TestSnapshots.java @@ -0,0 +1,141 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase; + +import static org.junit.Assert.assertEquals; + +import java.net.URI; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hive.hcatalog.cli.HCatDriver; +import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.hbase.snapshot.TableSnapshot; +import org.apache.hive.hcatalog.mapreduce.HCatInputFormat; +import org.apache.hive.hcatalog.mapreduce.InputJobInfo; +import org.junit.Test; + +public class TestSnapshots extends SkeletonHBaseTest { + private static HiveConf hcatConf; + private static HCatDriver hcatDriver; + + public void Initialize() throws Exception { + hcatConf = getHiveConf(); + hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + URI fsuri = getFileSystem().getUri(); + Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), + getTestDir()); + hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); + hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); + + //Add hbase properties + + for (Map.Entry el : getHbaseConf()) { + if (el.getKey().startsWith("hbase.")) { + hcatConf.set(el.getKey(), el.getValue()); + } + } + + SessionState.start(new CliSessionState(hcatConf)); + hcatDriver = new HCatDriver(); + + } + + @Test + public void TestSnapshotConversion() throws Exception { + Initialize(); + String tableName = newTableName("mytableOne"); + String databaseName = newTableName("mydatabase"); + String fullyQualTableName = databaseName + "." + tableName; + String db_dir = new Path(getTestDir(), "hbasedb").toString(); + String dbquery = "CREATE DATABASE IF NOT EXISTS " + databaseName + " LOCATION '" + + db_dir + "'"; + String tableQuery = "CREATE TABLE " + fullyQualTableName + + "(key string, value1 string, value2 string) STORED BY " + + "'org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:q1,cf2:q2')"; + + CommandProcessorResponse cmdResponse = hcatDriver.run(dbquery); + assertEquals(0, cmdResponse.getResponseCode()); + cmdResponse = hcatDriver.run(tableQuery); + assertEquals(0, cmdResponse.getResponseCode()); + + Configuration conf = new Configuration(hcatConf); + conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, + HCatUtil.serialize(getHiveConf().getAllProperties())); + Job job = new Job(conf); + Properties properties = new Properties(); + properties.setProperty(HBaseConstants.PROPERTY_TABLE_SNAPSHOT_KEY, "dummysnapshot"); + HCatInputFormat.setInput(job, databaseName, tableName).setProperties(properties); + String modifiedInputInfo = job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO); + InputJobInfo inputInfo = (InputJobInfo) HCatUtil.deserialize(modifiedInputInfo); + + Map revMap = new HashMap(); + revMap.put("cf1", 3L); + revMap.put("cf2", 5L); + TableSnapshot hbaseSnapshot = new TableSnapshot(fullyQualTableName, revMap, -1); + HCatTableSnapshot hcatSnapshot = HBaseRevisionManagerUtil.convertSnapshot(hbaseSnapshot, inputInfo.getTableInfo()); + + assertEquals(hcatSnapshot.getRevision("value1"), 3); + assertEquals(hcatSnapshot.getRevision("value2"), 5); + + String dropTable = "DROP TABLE " + fullyQualTableName; + cmdResponse = hcatDriver.run(dropTable); + assertEquals(0, cmdResponse.getResponseCode()); + + tableName = newTableName("mytableTwo"); + fullyQualTableName = databaseName + "." + tableName; + tableQuery = "CREATE TABLE " + fullyQualTableName + + "(key string, value1 string, value2 string) STORED BY " + + "'org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:q1,cf1:q2')"; + cmdResponse = hcatDriver.run(tableQuery); + assertEquals(0, cmdResponse.getResponseCode()); + revMap.clear(); + revMap.put("cf1", 3L); + hbaseSnapshot = new TableSnapshot(fullyQualTableName, revMap, -1); + HCatInputFormat.setInput(job, databaseName, tableName).setProperties(properties); + modifiedInputInfo = job.getConfiguration().get(HCatConstants.HCAT_KEY_JOB_INFO); + inputInfo = (InputJobInfo) HCatUtil.deserialize(modifiedInputInfo); + hcatSnapshot = HBaseRevisionManagerUtil.convertSnapshot(hbaseSnapshot, inputInfo.getTableInfo()); + assertEquals(hcatSnapshot.getRevision("value1"), 3); + assertEquals(hcatSnapshot.getRevision("value2"), 3); + + dropTable = "DROP TABLE " + fullyQualTableName; + cmdResponse = hcatDriver.run(dropTable); + assertEquals(0, cmdResponse.getResponseCode()); + + String dropDatabase = "DROP DATABASE IF EXISTS " + databaseName + "CASCADE"; + cmdResponse = hcatDriver.run(dropDatabase); + assertEquals(0, cmdResponse.getResponseCode()); + } +} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/IDGenClient.java hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/IDGenClient.java new file mode 100644 index 0000000..42234f4 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/IDGenClient.java @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.hbase.snapshot; + +import java.util.HashMap; +import java.util.Map; +import java.util.Random; + +public class IDGenClient extends Thread { + + String connectionStr; + String base_dir; + ZKUtil zkutil; + Random sleepTime = new Random(); + int runtime; + HashMap idMap; + String tableName; + + IDGenClient(String connectionStr, String base_dir, int time, String tableName) { + super(); + this.connectionStr = connectionStr; + this.base_dir = base_dir; + this.zkutil = new ZKUtil(connectionStr, base_dir); + this.runtime = time; + idMap = new HashMap(); + this.tableName = tableName; + } + + /* + * @see java.lang.Runnable#run() + */ + @Override + public void run() { + long startTime = System.currentTimeMillis(); + int timeElapsed = 0; + while( timeElapsed <= runtime){ + try { + long id = zkutil.nextId(tableName); + idMap.put(System.currentTimeMillis(), id); + + int sTime = sleepTime.nextInt(2); + Thread.sleep(sTime * 100); + } catch (Exception e) { + e.printStackTrace(); + } + + timeElapsed = (int) Math.ceil((System.currentTimeMillis() - startTime)/(double)1000); + } + + } + + Map getIdMap(){ + return idMap; + } + +} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/TestIDGenerator.java hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/TestIDGenerator.java new file mode 100644 index 0000000..fcc4017 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/TestIDGenerator.java @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.hbase.snapshot; + +import static org.junit.Assert.assertTrue; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; + +import org.apache.hive.hcatalog.hbase.SkeletonHBaseTest; +import org.junit.Assert; +import org.junit.Test; + +public class TestIDGenerator extends SkeletonHBaseTest { + + @Test + public void testIDGeneration() throws Exception { + + int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); + String servers = getHbaseConf().get("hbase.zookeeper.quorum"); + String[] splits = servers.split(","); + StringBuffer sb = new StringBuffer(); + for (String split : splits) { + sb.append(split); + sb.append(':'); + sb.append(port); + } + ZKUtil zkutil = new ZKUtil(sb.toString(), "/rm_base"); + + String tableName = "myTable"; + long initId = zkutil.nextId(tableName); + for (int i = 0; i < 10; i++) { + long id = zkutil.nextId(tableName); + Assert.assertEquals(initId + (i + 1), id); + } + } + + @Test + public void testMultipleClients() throws InterruptedException { + + int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); + String servers = getHbaseConf().get("hbase.zookeeper.quorum"); + String[] splits = servers.split(","); + StringBuffer sb = new StringBuffer(); + for (String split : splits) { + sb.append(split); + sb.append(':'); + sb.append(port); + } + + ArrayList clients = new ArrayList(); + + for (int i = 0; i < 5; i++) { + IDGenClient idClient = new IDGenClient(sb.toString(), "/rm_base", 10, "testTable"); + clients.add(idClient); + } + + for (IDGenClient idClient : clients) { + idClient.run(); + } + + for (IDGenClient idClient : clients) { + idClient.join(); + } + + HashMap idMap = new HashMap(); + for (IDGenClient idClient : clients) { + idMap.putAll(idClient.getIdMap()); + } + + ArrayList keys = new ArrayList(idMap.keySet()); + Collections.sort(keys); + int startId = 1; + for (Long key : keys) { + Long id = idMap.get(key); + System.out.println("Key: " + key + " Value " + id); + assertTrue(id == startId); + startId++; + + } + } +} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/TestRevisionManager.java hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/TestRevisionManager.java new file mode 100644 index 0000000..989b22f --- /dev/null +++ hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/TestRevisionManager.java @@ -0,0 +1,260 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.hbase.snapshot; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hive.hcatalog.hbase.SkeletonHBaseTest; +import org.apache.hive.hcatalog.hbase.snapshot.transaction.thrift.StoreFamilyRevision; +import org.apache.hive.hcatalog.hbase.snapshot.transaction.thrift.StoreFamilyRevisionList; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.data.Stat; +import org.junit.Test; + +public class TestRevisionManager extends SkeletonHBaseTest { + + @Test + public void testBasicZNodeCreation() throws IOException, KeeperException, InterruptedException { + + int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); + String servers = getHbaseConf().get("hbase.zookeeper.quorum"); + String[] splits = servers.split(","); + StringBuffer sb = new StringBuffer(); + for (String split : splits) { + sb.append(split); + sb.append(':'); + sb.append(port); + } + + ZKUtil zkutil = new ZKUtil(sb.toString(), "/rm_base"); + String tableName = newTableName("testTable"); + List columnFamilies = Arrays.asList("cf001", "cf002", "cf003"); + + zkutil.createRootZNodes(); + ZooKeeper zk = zkutil.getSession(); + Stat tempTwo = zk.exists("/rm_base" + PathUtil.DATA_DIR, false); + assertTrue(tempTwo != null); + Stat tempThree = zk.exists("/rm_base" + PathUtil.CLOCK_NODE, false); + assertTrue(tempThree != null); + + zkutil.setUpZnodesForTable(tableName, columnFamilies); + String transactionDataTablePath = "/rm_base" + PathUtil.DATA_DIR + "/" + tableName; + Stat result = zk.exists(transactionDataTablePath, false); + assertTrue(result != null); + + for (String colFamiliy : columnFamilies) { + String cfPath = transactionDataTablePath + "/" + colFamiliy; + Stat resultTwo = zk.exists(cfPath, false); + assertTrue(resultTwo != null); + } + + } + + @Test + public void testCommitTransaction() throws IOException { + + int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); + String servers = getHbaseConf().get("hbase.zookeeper.quorum"); + String[] splits = servers.split(","); + StringBuffer sb = new StringBuffer(); + for (String split : splits) { + sb.append(split); + sb.append(':'); + sb.append(port); + } + + Configuration conf = RevisionManagerConfiguration.create(getHbaseConf()); + conf.set(RMConstants.ZOOKEEPER_DATADIR, "/rm_base"); + ZKBasedRevisionManager manager = new ZKBasedRevisionManager(); + manager.initialize(conf); + manager.open(); + ZKUtil zkutil = new ZKUtil(sb.toString(), "/rm_base"); + + String tableName = newTableName("testTable"); + List columnFamilies = Arrays.asList("cf1", "cf2", "cf3"); + Transaction txn = manager.beginWriteTransaction(tableName, + columnFamilies); + + List cfs = zkutil.getColumnFamiliesOfTable(tableName); + assertTrue(cfs.size() == columnFamilies.size()); + for (String cf : cfs) { + assertTrue(columnFamilies.contains(cf)); + } + + for (String colFamily : columnFamilies) { + String path = PathUtil.getRunningTxnInfoPath("/rm_base", tableName, colFamily); + byte[] data = zkutil.getRawData(path, null); + StoreFamilyRevisionList list = new StoreFamilyRevisionList(); + ZKUtil.deserialize(list, data); + assertEquals(list.getRevisionListSize(), 1); + StoreFamilyRevision lightTxn = list.getRevisionList().get(0); + assertEquals(lightTxn.timestamp, txn.getTransactionExpireTimeStamp()); + assertEquals(lightTxn.revision, txn.getRevisionNumber()); + + } + manager.commitWriteTransaction(txn); + for (String colFamiliy : columnFamilies) { + String path = PathUtil.getRunningTxnInfoPath("/rm_base", tableName, colFamiliy); + byte[] data = zkutil.getRawData(path, null); + StoreFamilyRevisionList list = new StoreFamilyRevisionList(); + ZKUtil.deserialize(list, data); + assertEquals(list.getRevisionListSize(), 0); + + } + + manager.close(); + } + + @Test + public void testAbortTransaction() throws IOException { + + int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); + String host = getHbaseConf().get("hbase.zookeeper.quorum"); + Configuration conf = RevisionManagerConfiguration.create(getHbaseConf()); + conf.set(RMConstants.ZOOKEEPER_DATADIR, "/rm_base"); + ZKBasedRevisionManager manager = new ZKBasedRevisionManager(); + manager.initialize(conf); + manager.open(); + ZKUtil zkutil = new ZKUtil(host + ':' + port, "/rm_base"); + + String tableName = newTableName("testTable"); + List columnFamilies = Arrays.asList("cf1", "cf2", "cf3"); + Transaction txn = manager.beginWriteTransaction(tableName, columnFamilies); + List cfs = zkutil.getColumnFamiliesOfTable(tableName); + + assertTrue(cfs.size() == columnFamilies.size()); + for (String cf : cfs) { + assertTrue(columnFamilies.contains(cf)); + } + + for (String colFamiliy : columnFamilies) { + String path = PathUtil.getRunningTxnInfoPath("/rm_base", tableName, colFamiliy); + byte[] data = zkutil.getRawData(path, null); + StoreFamilyRevisionList list = new StoreFamilyRevisionList(); + ZKUtil.deserialize(list, data); + assertEquals(list.getRevisionListSize(), 1); + StoreFamilyRevision lightTxn = list.getRevisionList().get(0); + assertEquals(lightTxn.timestamp, txn.getTransactionExpireTimeStamp()); + assertEquals(lightTxn.revision, txn.getRevisionNumber()); + + } + manager.abortWriteTransaction(txn); + for (String colFamiliy : columnFamilies) { + String path = PathUtil.getRunningTxnInfoPath("/rm_base", tableName, colFamiliy); + byte[] data = zkutil.getRawData(path, null); + StoreFamilyRevisionList list = new StoreFamilyRevisionList(); + ZKUtil.deserialize(list, data); + assertEquals(list.getRevisionListSize(), 0); + + } + + for (String colFamiliy : columnFamilies) { + String path = PathUtil.getAbortInformationPath("/rm_base", tableName, colFamiliy); + byte[] data = zkutil.getRawData(path, null); + StoreFamilyRevisionList list = new StoreFamilyRevisionList(); + ZKUtil.deserialize(list, data); + assertEquals(list.getRevisionListSize(), 1); + StoreFamilyRevision abortedTxn = list.getRevisionList().get(0); + assertEquals(abortedTxn.getRevision(), txn.getRevisionNumber()); + } + manager.close(); + } + + @Test + public void testKeepAliveTransaction() throws InterruptedException, IOException { + + int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); + String servers = getHbaseConf().get("hbase.zookeeper.quorum"); + String[] splits = servers.split(","); + StringBuffer sb = new StringBuffer(); + for (String split : splits) { + sb.append(split); + sb.append(':'); + sb.append(port); + } + + Configuration conf = RevisionManagerConfiguration.create(getHbaseConf()); + conf.set(RMConstants.ZOOKEEPER_DATADIR, "/rm_base"); + ZKBasedRevisionManager manager = new ZKBasedRevisionManager(); + manager.initialize(conf); + manager.open(); + String tableName = newTableName("testTable"); + List columnFamilies = Arrays.asList("cf1", "cf2"); + Transaction txn = manager.beginWriteTransaction(tableName, + columnFamilies, 40); + Thread.sleep(100); + try { + manager.commitWriteTransaction(txn); + } catch (Exception e) { + assertTrue(e instanceof IOException); + assertEquals(e.getMessage(), + "The transaction to be removed not found in the data."); + } + + } + + @Test + public void testCreateSnapshot() throws IOException { + int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); + String host = getHbaseConf().get("hbase.zookeeper.quorum"); + Configuration conf = RevisionManagerConfiguration.create(getHbaseConf()); + conf.set(RMConstants.ZOOKEEPER_DATADIR, "/rm_base"); + ZKBasedRevisionManager manager = new ZKBasedRevisionManager(); + manager.initialize(conf); + manager.open(); + String tableName = newTableName("testTable"); + List cfOne = Arrays.asList("cf1", "cf2"); + List cfTwo = Arrays.asList("cf2", "cf3"); + Transaction tsx1 = manager.beginWriteTransaction(tableName, cfOne); + Transaction tsx2 = manager.beginWriteTransaction(tableName, cfTwo); + TableSnapshot snapshotOne = manager.createSnapshot(tableName); + assertEquals(snapshotOne.getRevision("cf1"), 0); + assertEquals(snapshotOne.getRevision("cf2"), 0); + assertEquals(snapshotOne.getRevision("cf3"), 1); + + List cfThree = Arrays.asList("cf1", "cf3"); + Transaction tsx3 = manager.beginWriteTransaction(tableName, cfThree); + manager.commitWriteTransaction(tsx1); + TableSnapshot snapshotTwo = manager.createSnapshot(tableName); + assertEquals(snapshotTwo.getRevision("cf1"), 2); + assertEquals(snapshotTwo.getRevision("cf2"), 1); + assertEquals(snapshotTwo.getRevision("cf3"), 1); + + manager.commitWriteTransaction(tsx2); + TableSnapshot snapshotThree = manager.createSnapshot(tableName); + assertEquals(snapshotThree.getRevision("cf1"), 2); + assertEquals(snapshotThree.getRevision("cf2"), 3); + assertEquals(snapshotThree.getRevision("cf3"), 2); + manager.commitWriteTransaction(tsx3); + TableSnapshot snapshotFour = manager.createSnapshot(tableName); + assertEquals(snapshotFour.getRevision("cf1"), 3); + assertEquals(snapshotFour.getRevision("cf2"), 3); + assertEquals(snapshotFour.getRevision("cf3"), 3); + + } + + +} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/TestRevisionManagerConfiguration.java hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/TestRevisionManagerConfiguration.java new file mode 100644 index 0000000..38d5f42 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/TestRevisionManagerConfiguration.java @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase.snapshot; + +import org.apache.hadoop.conf.Configuration; +import org.junit.Assert; +import org.junit.Test; + +public class TestRevisionManagerConfiguration { + + @Test + public void testDefault() { + Configuration conf = RevisionManagerConfiguration.create(); + Assert.assertEquals("org.apache.hive.hcatalog.hbase.snapshot.ZKBasedRevisionManager", + conf.get(RevisionManagerFactory.REVISION_MGR_IMPL_CLASS)); + } +} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/TestRevisionManagerEndpoint.java hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/TestRevisionManagerEndpoint.java new file mode 100644 index 0000000..2afd463 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/TestRevisionManagerEndpoint.java @@ -0,0 +1,206 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.hbase.snapshot; + +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.commons.lang.builder.ToStringBuilder; +import org.apache.commons.lang.builder.ToStringStyle; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; +import org.apache.hive.hcatalog.hbase.SkeletonHBaseTest; +import org.junit.Assert; +import org.junit.Test; + +public class TestRevisionManagerEndpoint extends SkeletonHBaseTest { + + static { + // test case specific mini cluster settings + testConf = new Configuration(false); + testConf.setStrings(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, + "org.apache.hive.hcatalog.hbase.snapshot.RevisionManagerEndpoint", + "org.apache.hadoop.hbase.coprocessor.GenericEndpoint"); + testConf.set(RMConstants.REVISION_MGR_ENDPOINT_IMPL_CLASS, MockRM.class.getName()); + } + + /** + * Mock implementation to test the protocol/serialization + */ + public static class MockRM implements RevisionManager { + + private static class Invocation { + Invocation(String methodName, Object ret, Object... args) { + this.methodName = methodName; + this.args = args; + this.ret = ret; + } + + String methodName; + Object[] args; + Object ret; + + private static boolean equals(Object obj1, Object obj2) { + if (obj1 == obj2) return true; + if (obj1 == null || obj2 == null) return false; + if (obj1 instanceof Transaction || obj1 instanceof TableSnapshot) { + return obj1.toString().equals(obj2.toString()); + } + return obj1.equals(obj2); + } + + @Override + public boolean equals(Object obj) { + Invocation other = (Invocation) obj; + if (this == other) return true; + if (other == null) return false; + if (this.args != other.args) { + if (this.args == null || other.args == null) return false; + if (this.args.length != other.args.length) return false; + for (int i = 0; i < args.length; i++) { + if (!equals(this.args[i], other.args[i])) return false; + } + } + return equals(this.ret, other.ret); + } + + @Override + public String toString() { + return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE). + append("method", this.methodName). + append("args", this.args). + append("returns", this.ret). + toString(); + } + } + + final static String DEFAULT_INSTANCE = "default"; + final static Map INSTANCES = new ConcurrentHashMap(); + Invocation lastCall; + boolean isOpen = false; + + private T recordCall(T result, Object... args) { + StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace(); + lastCall = new Invocation(stackTrace[2].getMethodName(), result, args); + return result; + } + + @Override + public void initialize(Configuration conf) { + if (!INSTANCES.containsKey(DEFAULT_INSTANCE)) + INSTANCES.put(DEFAULT_INSTANCE, this); + } + + @Override + public void open() throws IOException { + isOpen = true; + } + + @Override + public void close() throws IOException { + isOpen = false; + } + + @Override + public void createTable(String table, List columnFamilies) throws IOException { + } + + @Override + public void dropTable(String table) throws IOException { + } + + @Override + public Transaction beginWriteTransaction(String table, + List families) throws IOException { + return recordCall(null, table, families); + } + + @Override + public Transaction beginWriteTransaction(String table, + List families, long keepAlive) throws IOException { + return recordCall(null, table, families, keepAlive); + } + + @Override + public void commitWriteTransaction(Transaction transaction) + throws IOException { + } + + @Override + public void abortWriteTransaction(Transaction transaction) + throws IOException { + } + + @Override + public List getAbortedWriteTransactions(String table, + String columnFamily) throws IOException { + return null; + } + + @Override + public TableSnapshot createSnapshot(String tableName) + throws IOException { + return null; + } + + @Override + public TableSnapshot createSnapshot(String tableName, long revision) + throws IOException { + TableSnapshot ret = new TableSnapshot(tableName, new HashMap(), revision); + return recordCall(ret, tableName, revision); + } + + @Override + public void keepAlive(Transaction transaction) throws IOException { + recordCall(null, transaction); + } + } + + @Test + public void testRevisionManagerProtocol() throws Throwable { + + Configuration conf = getHbaseConf(); + RevisionManager rm = RevisionManagerFactory.getOpenedRevisionManager( + RevisionManagerEndpointClient.class.getName(), conf); + + MockRM mockImpl = MockRM.INSTANCES.get(MockRM.DEFAULT_INSTANCE); + Assert.assertNotNull(mockImpl); + Assert.assertTrue(mockImpl.isOpen); + + Transaction t = new Transaction("t1", Arrays.asList("f1", "f2"), 0, 0); + MockRM.Invocation call = new MockRM.Invocation("keepAlive", null, t); + rm.keepAlive(t); + Assert.assertEquals(call.methodName, call, mockImpl.lastCall); + + t = new Transaction("t2", Arrays.asList("f21", "f22"), 0, 0); + call = new MockRM.Invocation("beginWriteTransaction", null, t.getTableName(), t.getColumnFamilies()); + call.ret = rm.beginWriteTransaction(t.getTableName(), t.getColumnFamilies()); + Assert.assertEquals(call.methodName, call, mockImpl.lastCall); + + call = new MockRM.Invocation("createSnapshot", null, "t3", 1L); + call.ret = rm.createSnapshot("t3", 1); + Assert.assertEquals(call.methodName, call, mockImpl.lastCall); + + } + +} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/TestThriftSerialization.java hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/TestThriftSerialization.java new file mode 100644 index 0000000..8cd4e84 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/TestThriftSerialization.java @@ -0,0 +1,85 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.hbase.snapshot; + +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.hive.hcatalog.hbase.snapshot.transaction.thrift.StoreFamilyRevision; +import org.apache.hive.hcatalog.hbase.snapshot.transaction.thrift.StoreFamilyRevisionList; +import org.junit.Test; + +public class TestThriftSerialization { + + @Test + public void testLightWeightTransaction() { + StoreFamilyRevision trxn = new StoreFamilyRevision(0, 1000); + try { + + byte[] data = ZKUtil.serialize(trxn); + StoreFamilyRevision newWtx = new StoreFamilyRevision(); + ZKUtil.deserialize(newWtx, data); + + assertTrue(newWtx.getRevision() == trxn.getRevision()); + assertTrue(newWtx.getTimestamp() == trxn.getTimestamp()); + + } catch (IOException e) { + e.printStackTrace(); + } + } + + @Test + public void testWriteTransactionList() { + List txnList = new ArrayList(); + long version; + long timestamp; + for (int i = 0; i < 10; i++) { + version = i; + timestamp = 1000 + i; + StoreFamilyRevision wtx = new StoreFamilyRevision(version, timestamp); + txnList.add(wtx); + } + + StoreFamilyRevisionList wList = new StoreFamilyRevisionList(txnList); + + try { + byte[] data = ZKUtil.serialize(wList); + StoreFamilyRevisionList newList = new StoreFamilyRevisionList(); + ZKUtil.deserialize(newList, data); + assertTrue(newList.getRevisionListSize() == wList.getRevisionListSize()); + + Iterator itr = newList.getRevisionListIterator(); + int i = 0; + while (itr.hasNext()) { + StoreFamilyRevision txn = itr.next(); + assertTrue(txn.getRevision() == i); + assertTrue(txn.getTimestamp() == (i + 1000)); + i++; + } + + } catch (IOException e) { + e.printStackTrace(); + } + } + +} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/TestZNodeSetUp.java hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/TestZNodeSetUp.java new file mode 100644 index 0000000..7d051a2 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/TestZNodeSetUp.java @@ -0,0 +1,120 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase.snapshot; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.net.URI; +import java.util.Map; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hive.hcatalog.cli.HCatDriver; +import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; +import org.apache.hive.hcatalog.hbase.SkeletonHBaseTest; +import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.data.Stat; +import org.junit.Test; + + +public class TestZNodeSetUp extends SkeletonHBaseTest { + + private static HiveConf hcatConf; + private static HCatDriver hcatDriver; + + public void Initialize() throws Exception { + + hcatConf = getHiveConf(); + hcatConf.set(ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + URI fsuri = getFileSystem().getUri(); + Path whPath = new Path(fsuri.getScheme(), fsuri.getAuthority(), + getTestDir()); + hcatConf.set(HiveConf.ConfVars.HADOOPFS.varname, fsuri.toString()); + hcatConf.set(ConfVars.METASTOREWAREHOUSE.varname, whPath.toString()); + + //Add hbase properties + + for (Map.Entry el : getHbaseConf()) { + if (el.getKey().startsWith("hbase.")) { + hcatConf.set(el.getKey(), el.getValue()); + } + } + HBaseConfiguration.merge(hcatConf, + RevisionManagerConfiguration.create()); + hcatConf.set(RMConstants.ZOOKEEPER_DATADIR, "/rm_base"); + SessionState.start(new CliSessionState(hcatConf)); + hcatDriver = new HCatDriver(); + + } + + @Test + public void testBasicZNodeCreation() throws Exception { + + Initialize(); + int port = getHbaseConf().getInt("hbase.zookeeper.property.clientPort", 2181); + String servers = getHbaseConf().get("hbase.zookeeper.quorum"); + String[] splits = servers.split(","); + StringBuffer sb = new StringBuffer(); + for (String split : splits) { + sb.append(split); + sb.append(':'); + sb.append(port); + } + + hcatDriver.run("drop table test_table"); + CommandProcessorResponse response = hcatDriver + .run("create table test_table(key int, value string) STORED BY " + + "'org.apache.hive.hcatalog.hbase.HBaseHCatStorageHandler'" + + "TBLPROPERTIES ('hbase.columns.mapping'=':key,cf1:val')"); + + assertEquals(0, response.getResponseCode()); + + HBaseAdmin hAdmin = new HBaseAdmin(getHbaseConf()); + boolean doesTableExist = hAdmin.tableExists("test_table"); + assertTrue(doesTableExist); + + + ZKUtil zkutil = new ZKUtil(sb.toString(), "/rm_base"); + ZooKeeper zk = zkutil.getSession(); + String tablePath = PathUtil.getTxnDataPath("/rm_base", "test_table"); + Stat tempTwo = zk.exists(tablePath, false); + assertTrue(tempTwo != null); + + String cfPath = PathUtil.getTxnDataPath("/rm_base", "test_table") + "/cf1"; + Stat tempThree = zk.exists(cfPath, false); + assertTrue(tempThree != null); + + hcatDriver.run("drop table test_table"); + + System.out.println("Table path : " + tablePath); + Stat tempFour = zk.exists(tablePath, false); + assertTrue(tempFour == null); + + } + +} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/lock/TestWriteLock.java hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/lock/TestWriteLock.java new file mode 100644 index 0000000..4cb8478 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/lock/TestWriteLock.java @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase.snapshot.lock; + +import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.test.ClientBase; + +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Test; + +/** + * test for writelock + * This class is taken from the zookeeper 3.4.0 as-is to test the zookeeper lock + * Recipe with a change in the package name. + */ +public class TestWriteLock extends ClientBase { + protected int sessionTimeout = 10 * 1000; + protected String dir = "/" + getClass().getName(); + protected WriteLock[] nodes; + protected CountDownLatch latch = new CountDownLatch(1); + private boolean restartServer = true; + private boolean workAroundClosingLastZNodeFails = true; + private boolean killLeader = true; + + @Test + public void testRun() throws Exception { + runTest(3); + } + + class LockCallback implements LockListener { + public void lockAcquired() { + latch.countDown(); + } + + public void lockReleased() { + + } + + } + + protected void runTest(int count) throws Exception { + nodes = new WriteLock[count]; + for (int i = 0; i < count; i++) { + ZooKeeper keeper = createClient(); + WriteLock leader = new WriteLock(keeper, dir, null); + leader.setLockListener(new LockCallback()); + nodes[i] = leader; + + leader.lock(); + } + + // lets wait for any previous leaders to die and one of our new + // nodes to become the new leader + latch.await(30, TimeUnit.SECONDS); + + WriteLock first = nodes[0]; + dumpNodes(count); + + // lets assert that the first election is the leader + Assert.assertTrue("The first znode should be the leader " + first.getId(), first.isOwner()); + + for (int i = 1; i < count; i++) { + WriteLock node = nodes[i]; + Assert.assertFalse("Node should not be the leader " + node.getId(), node.isOwner()); + } + + if (count > 1) { + if (killLeader) { + System.out.println("Now killing the leader"); + // now lets kill the leader + latch = new CountDownLatch(1); + first.unlock(); + latch.await(30, TimeUnit.SECONDS); + //Thread.sleep(10000); + WriteLock second = nodes[1]; + dumpNodes(count); + // lets assert that the first election is the leader + Assert.assertTrue("The second znode should be the leader " + second.getId(), second.isOwner()); + + for (int i = 2; i < count; i++) { + WriteLock node = nodes[i]; + Assert.assertFalse("Node should not be the leader " + node.getId(), node.isOwner()); + } + } + + + if (restartServer) { + // now lets stop the server + System.out.println("Now stopping the server"); + stopServer(); + Thread.sleep(10000); + + // TODO lets assert that we are no longer the leader + dumpNodes(count); + + System.out.println("Starting the server"); + startServer(); + Thread.sleep(10000); + + for (int i = 0; i < count - 1; i++) { + System.out.println("Calling acquire for node: " + i); + nodes[i].lock(); + } + dumpNodes(count); + System.out.println("Now closing down..."); + } + } + } + + protected void dumpNodes(int count) { + for (int i = 0; i < count; i++) { + WriteLock node = nodes[i]; + System.out.println("node: " + i + " id: " + + node.getId() + " is leader: " + node.isOwner()); + } + } + + @After + public void tearDown() throws Exception { + if (nodes != null) { + for (int i = 0; i < nodes.length; i++) { + WriteLock node = nodes[i]; + if (node != null) { + System.out.println("Closing node: " + i); + node.close(); + if (workAroundClosingLastZNodeFails && i == nodes.length - 1) { + System.out.println("Not closing zookeeper: " + i + " due to bug!"); + } else { + System.out.println("Closing zookeeper: " + i); + node.getZookeeper().close(); + System.out.println("Closed zookeeper: " + i); + } + } + } + } + System.out.println("Now lets stop the server"); + super.tearDown(); + + } +} diff --git hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/lock/TestZNodeName.java hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/lock/TestZNodeName.java new file mode 100644 index 0000000..3780e23 --- /dev/null +++ hcatalog/storage-handlers/hbase/src/test/org/apache/hive/hcatalog/hbase/snapshot/lock/TestZNodeName.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.hbase.snapshot.lock; + +import junit.framework.TestCase; + +import java.util.SortedSet; +import java.util.TreeSet; + +import org.junit.Test; + +/** + * test for znodenames. This class is taken as-is from zookeeper lock recipe test. + * The package name has been changed. + */ +public class TestZNodeName extends TestCase { + @Test + public void testOrderWithSamePrefix() throws Exception { + String[] names = { "x-3", "x-5", "x-11", "x-1" }; + String[] expected = { "x-1", "x-3", "x-5", "x-11" }; + assertOrderedNodeNames(names, expected); + } + @Test + public void testOrderWithDifferentPrefixes() throws Exception { + String[] names = { "r-3", "r-2", "r-1", "w-2", "w-1" }; + String[] expected = { "r-1", "r-2", "r-3", "w-1", "w-2" }; + assertOrderedNodeNames(names, expected); + } + + protected void assertOrderedNodeNames(String[] names, String[] expected) { + int size = names.length; + assertEquals("The two arrays should be the same size!", names.length, expected.length); + SortedSet nodeNames = new TreeSet(); + for (String name : names) { + nodeNames.add(new ZNodeName(name)); + } + + int index = 0; + for (ZNodeName nodeName : nodeNames) { + String name = nodeName.getName(); + assertEquals("Node " + index, expected[index++], name); + } + } + +} diff --git hcatalog/webhcat/java-client/pom.xml hcatalog/webhcat/java-client/pom.xml index d794f9f..450bec1 100644 --- hcatalog/webhcat/java-client/pom.xml +++ hcatalog/webhcat/java-client/pom.xml @@ -22,14 +22,13 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> - org.apache.hcatalog + org.apache.hive.hcatalog hcatalog 0.12.0-SNAPSHOT ../../pom.xml 4.0.0 - org.apache.hcatalog webhcat-java-client jar webhcat-java-client @@ -37,7 +36,7 @@ - org.apache.hcatalog + org.apache.hive.hcatalog hcatalog-core ${hcatalog.version} compile diff --git hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/ConnectionFailureException.java hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/ConnectionFailureException.java deleted file mode 100644 index 9ecb416..0000000 --- hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/ConnectionFailureException.java +++ /dev/null @@ -1,39 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.api; - -import org.apache.hcatalog.common.HCatException; -/** - * Class representing exceptions resulting from connection problems - * between HCat client and server. - */ -public class ConnectionFailureException extends HCatException { - - private static final long serialVersionUID = 1L; - - /** - * @param message Exception message. - * @param cause The wrapped Throwable that caused this exception. - */ - public ConnectionFailureException(String message, Throwable cause) { - super(message, cause); - } - -} diff --git hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatAddPartitionDesc.java hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatAddPartitionDesc.java deleted file mode 100644 index e02043a..0000000 --- hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatAddPartitionDesc.java +++ /dev/null @@ -1,185 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.api; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.Warehouse; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hcatalog.common.HCatException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * The Class HCatAddPartitionDesc helps users in defining partition attributes. - */ -public class HCatAddPartitionDesc { - - private static final Logger LOG = LoggerFactory.getLogger(HCatAddPartitionDesc.class); - private String tableName; - private String dbName; - private String location; - private Map partSpec; - - private HCatAddPartitionDesc(String dbName, String tbl, String loc, Map spec) { - this.dbName = dbName; - this.tableName = tbl; - this.location = loc; - this.partSpec = spec; - } - - /** - * Gets the location. - * - * @return the location - */ - public String getLocation() { - return this.location; - } - - - /** - * Gets the partition spec. - * - * @return the partition spec - */ - public Map getPartitionSpec() { - return this.partSpec; - } - - /** - * Gets the table name. - * - * @return the table name - */ - public String getTableName() { - return this.tableName; - } - - /** - * Gets the database name. - * - * @return the database name - */ - public String getDatabaseName() { - return this.dbName; - } - - @Override - public String toString() { - return "HCatAddPartitionDesc [" - + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") - + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") - + (location != null ? "location=" + location + ", " : "location=null") - + (partSpec != null ? "partSpec=" + partSpec : "partSpec=null") + "]"; - } - - /** - * Creates the builder for specifying attributes. - * - * @param dbName the db name - * @param tableName the table name - * @param location the location - * @param partSpec the part spec - * @return the builder - * @throws HCatException - */ - public static Builder create(String dbName, String tableName, String location, - Map partSpec) throws HCatException { - return new Builder(dbName, tableName, location, partSpec); - } - - Partition toHivePartition(Table hiveTable) throws HCatException { - Partition hivePtn = new Partition(); - hivePtn.setDbName(this.dbName); - hivePtn.setTableName(this.tableName); - - List pvals = new ArrayList(); - for (FieldSchema field : hiveTable.getPartitionKeys()) { - String val = partSpec.get(field.getName()); - if (val == null || val.length() == 0) { - throw new HCatException("create partition: Value for key " - + field.getName() + " is null or empty"); - } - pvals.add(val); - } - - hivePtn.setValues(pvals); - StorageDescriptor sd = new StorageDescriptor(hiveTable.getSd()); - hivePtn.setSd(sd); - hivePtn.setParameters(hiveTable.getParameters()); - if (this.location != null) { - hivePtn.getSd().setLocation(this.location); - } else { - String partName; - try { - partName = Warehouse.makePartName( - hiveTable.getPartitionKeys(), pvals); - LOG.info("Setting partition location to :" + partName); - } catch (MetaException e) { - throw new HCatException("Exception while creating partition name.", e); - } - Path partPath = new Path(hiveTable.getSd().getLocation(), partName); - hivePtn.getSd().setLocation(partPath.toString()); - } - hivePtn.setCreateTime((int) (System.currentTimeMillis() / 1000)); - hivePtn.setLastAccessTimeIsSet(false); - return hivePtn; - } - - public static class Builder { - - private String tableName; - private String location; - private Map values; - private String dbName; - - private Builder(String dbName, String tableName, String location, Map values) { - this.dbName = dbName; - this.tableName = tableName; - this.location = location; - this.values = values; - } - - /** - * Builds the HCatAddPartitionDesc. - * - * @return the h cat add partition desc - * @throws HCatException - */ - public HCatAddPartitionDesc build() throws HCatException { - if (this.dbName == null) { - this.dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; - } - HCatAddPartitionDesc desc = new HCatAddPartitionDesc( - this.dbName, this.tableName, this.location, - this.values); - return desc; - } - } - -} diff --git hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatClient.java hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatClient.java deleted file mode 100644 index 8bd3a68..0000000 --- hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatClient.java +++ /dev/null @@ -1,361 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.api; - -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.common.JavaUtils; -import org.apache.hadoop.hive.metastore.api.PartitionEventType; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.schema.HCatFieldSchema; - -/** - * The abstract class HCatClient containing APIs for HCatalog DDL commands. - */ -public abstract class HCatClient { - - public enum DropDBMode {RESTRICT, CASCADE} - - public static final String HCAT_CLIENT_IMPL_CLASS = "hcat.client.impl.class"; - - /** - * Creates an instance of HCatClient. - * - * @param conf An instance of configuration. - * @return An instance of HCatClient. - * @throws HCatException - */ - public static HCatClient create(Configuration conf) throws HCatException { - HCatClient client = null; - String className = conf.get(HCAT_CLIENT_IMPL_CLASS, - HCatClientHMSImpl.class.getName()); - try { - Class clientClass = Class.forName(className, - true, JavaUtils.getClassLoader()).asSubclass( - HCatClient.class); - client = (HCatClient) clientClass.newInstance(); - } catch (ClassNotFoundException e) { - throw new HCatException( - "ClassNotFoundException while creating client class.", e); - } catch (InstantiationException e) { - throw new HCatException( - "InstantiationException while creating client class.", e); - } catch (IllegalAccessException e) { - throw new HCatException( - "IllegalAccessException while creating client class.", e); - } - if (client != null) { - client.initialize(conf); - } - return client; - } - - abstract void initialize(Configuration conf) throws HCatException; - - /** - * Get all existing databases that match the given - * pattern. The matching occurs as per Java regular expressions - * - * @param pattern java re pattern - * @return list of database names - * @throws HCatException - */ - public abstract List listDatabaseNamesByPattern(String pattern) - throws HCatException; - - /** - * Gets the database. - * - * @param dbName The name of the database. - * @return An instance of HCatDatabaseInfo. - * @throws HCatException - */ - public abstract HCatDatabase getDatabase(String dbName) throws HCatException; - - /** - * Creates the database. - * - * @param dbInfo An instance of HCatCreateDBDesc. - * @throws HCatException - */ - public abstract void createDatabase(HCatCreateDBDesc dbInfo) - throws HCatException; - - /** - * Drops a database. - * - * @param dbName The name of the database to delete. - * @param ifExists Hive returns an error if the database specified does not exist, - * unless ifExists is set to true. - * @param mode This is set to either "restrict" or "cascade". Restrict will - * remove the schema if all the tables are empty. Cascade removes - * everything including data and definitions. - * @throws HCatException - */ - public abstract void dropDatabase(String dbName, boolean ifExists, - DropDBMode mode) throws HCatException; - - /** - * Returns all existing tables from the specified database which match the given - * pattern. The matching occurs as per Java regular expressions. - * @param dbName The name of the DB (to be searched) - * @param tablePattern The regex for the table-name - * @return list of table names - * @throws HCatException - */ - public abstract List listTableNamesByPattern(String dbName, String tablePattern) - throws HCatException; - - /** - * Gets the table. - * - * @param dbName The name of the database. - * @param tableName The name of the table. - * @return An instance of HCatTableInfo. - * @throws HCatException - */ - public abstract HCatTable getTable(String dbName, String tableName) - throws HCatException; - - /** - * Creates the table. - * - * @param createTableDesc An instance of HCatCreateTableDesc class. - * @throws HCatException - */ - public abstract void createTable(HCatCreateTableDesc createTableDesc) throws HCatException; - - /** - * Updates the Table's column schema to the specified definition. - * - * @param dbName The name of the database. - * @param tableName The name of the table. - * @param columnSchema The (new) definition of the column schema (i.e. list of fields). - * - */ - public abstract void updateTableSchema(String dbName, String tableName, List columnSchema) - throws HCatException; - - /** - * Creates the table like an existing table. - * - * @param dbName The name of the database. - * @param existingTblName The name of the existing table. - * @param newTableName The name of the new table. - * @param ifNotExists If true, then error related to already table existing is skipped. - * @param isExternal Set to "true", if table has be created at a different - * location other than default. - * @param location The location for the table. - * @throws HCatException - */ - public abstract void createTableLike(String dbName, String existingTblName, - String newTableName, boolean ifNotExists, boolean isExternal, - String location) throws HCatException; - - /** - * Drop table. - * - * @param dbName The name of the database. - * @param tableName The name of the table. - * @param ifExists Hive returns an error if the database specified does not exist, - * unless ifExists is set to true. - * @throws HCatException - */ - public abstract void dropTable(String dbName, String tableName, - boolean ifExists) throws HCatException; - - /** - * Renames a table. - * - * @param dbName The name of the database. - * @param oldName The name of the table to be renamed. - * @param newName The new name of the table. - * @throws HCatException - */ - public abstract void renameTable(String dbName, String oldName, - String newName) throws HCatException; - - /** - * Gets all the partitions. - * - * @param dbName The name of the database. - * @param tblName The name of the table. - * @return A list of partitions. - * @throws HCatException - */ - public abstract List getPartitions(String dbName, String tblName) - throws HCatException; - - /** - * Gets all the partitions that match the specified (and possibly partial) partition specification. - * A partial partition-specification is one where not all partition-keys have associated values. For example, - * for a table ('myDb.myTable') with 2 partition keys (dt string, region string), - * if for each dt ('20120101', '20120102', etc.) there can exist 3 regions ('us', 'uk', 'in'), then, - * 1. Complete partition spec: getPartitions('myDb', 'myTable', {dt='20120101', region='us'}) would return 1 partition. - * 2. Partial partition spec: getPartitions('myDb', 'myTable', {dt='20120101'}) would return all 3 partitions, - * with dt='20120101' (i.e. region = 'us', 'uk' and 'in'). - * @param dbName The name of the database. - * @param tblName The name of the table. - * @param partitionSpec The partition specification. (Need not include all partition keys.) - * @return A list of partitions. - * @throws HCatException - */ - public abstract List getPartitions(String dbName, String tblName, Map partitionSpec) - throws HCatException; - - /** - * Gets the partition. - * - * @param dbName The database name. - * @param tableName The table name. - * @param partitionSpec The partition specification, {[col_name,value],[col_name2,value2]}. All partition-key-values - * must be specified. - * @return An instance of HCatPartitionInfo. - * @throws HCatException - */ - public abstract HCatPartition getPartition(String dbName, String tableName, - Map partitionSpec) throws HCatException; - - /** - * Adds the partition. - * - * @param partInfo An instance of HCatAddPartitionDesc. - * @throws HCatException - */ - public abstract void addPartition(HCatAddPartitionDesc partInfo) - throws HCatException; - - /** - * Adds a list of partitions. - * - * @param partInfoList A list of HCatAddPartitionDesc. - * @return The number of partitions added. - * @throws HCatException - */ - public abstract int addPartitions(List partInfoList) - throws HCatException; - - /** - * Drops partition(s) that match the specified (and possibly partial) partition specification. - * A partial partition-specification is one where not all partition-keys have associated values. For example, - * for a table ('myDb.myTable') with 2 partition keys (dt string, region string), - * if for each dt ('20120101', '20120102', etc.) there can exist 3 regions ('us', 'uk', 'in'), then, - * 1. Complete partition spec: dropPartitions('myDb', 'myTable', {dt='20120101', region='us'}) would drop 1 partition. - * 2. Partial partition spec: dropPartitions('myDb', 'myTable', {dt='20120101'}) would drop all 3 partitions, - * with dt='20120101' (i.e. region = 'us', 'uk' and 'in'). - * @param dbName The database name. - * @param tableName The table name. - * @param partitionSpec The partition specification, {[col_name,value],[col_name2,value2]}. - * @param ifExists Hive returns an error if the partition specified does not exist, unless ifExists is set to true. - * @throws HCatException,ConnectionFailureException - */ - public abstract void dropPartitions(String dbName, String tableName, - Map partitionSpec, boolean ifExists) - throws HCatException; - - /** - * List partitions by filter. - * - * @param dbName The database name. - * @param tblName The table name. - * @param filter The filter string, - * for example "part1 = \"p1_abc\" and part2 <= "\p2_test\"". Filtering can - * be done only on string partition keys. - * @return list of partitions - * @throws HCatException - */ - public abstract List listPartitionsByFilter(String dbName, String tblName, - String filter) throws HCatException; - - /** - * Mark partition for event. - * - * @param dbName The database name. - * @param tblName The table name. - * @param partKVs the key-values associated with the partition. - * @param eventType the event type - * @throws HCatException - */ - public abstract void markPartitionForEvent(String dbName, String tblName, - Map partKVs, PartitionEventType eventType) - throws HCatException; - - /** - * Checks if a partition is marked for event. - * - * @param dbName the db name - * @param tblName the table name - * @param partKVs the key-values associated with the partition. - * @param eventType the event type - * @return true, if is partition marked for event - * @throws HCatException - */ - public abstract boolean isPartitionMarkedForEvent(String dbName, String tblName, - Map partKVs, PartitionEventType eventType) - throws HCatException; - - /** - * Gets the delegation token. - * - * @param owner the owner - * @param renewerKerberosPrincipalName the renewer kerberos principal name - * @return the delegation token - * @throws HCatException,ConnectionFailureException - */ - public abstract String getDelegationToken(String owner, - String renewerKerberosPrincipalName) throws HCatException; - - /** - * Renew delegation token. - * - * @param tokenStrForm the token string - * @return the new expiration time - * @throws HCatException - */ - public abstract long renewDelegationToken(String tokenStrForm) - throws HCatException; - - /** - * Cancel delegation token. - * - * @param tokenStrForm the token string - * @throws HCatException - */ - public abstract void cancelDelegationToken(String tokenStrForm) - throws HCatException; - - /** - * Retrieve Message-bus topic for a table. - * - * @param dbName The name of the DB. - * @param tableName The name of the table. - * @return Topic-name for the message-bus on which messages will be sent for the specified table. - * By default, this is set to .. Returns null when not set. - */ - public abstract String getMessageBusTopicName(String dbName, String tableName) throws HCatException; - - /** - * Close the hcatalog client. - * - * @throws HCatException - */ - public abstract void close() throws HCatException; -} diff --git hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatClientHMSImpl.java hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatClientHMSImpl.java deleted file mode 100644 index 3a1b9c5..0000000 --- hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatClientHMSImpl.java +++ /dev/null @@ -1,723 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.api; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.TableType; -import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hadoop.hive.metastore.api.InvalidObjectException; -import org.apache.hadoop.hive.metastore.api.InvalidOperationException; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.PartitionEventType; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.api.UnknownDBException; -import org.apache.hadoop.hive.metastore.api.UnknownTableException; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.common.HCatUtil; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; -import org.apache.thrift.TException; - -/** - * The HCatClientHMSImpl is the Hive Metastore client based implementation of - * HCatClient. - */ -public class HCatClientHMSImpl extends HCatClient { - - private HiveMetaStoreClient hmsClient; - private Configuration config; - private HiveConf hiveConfig; - - @Override - public List listDatabaseNamesByPattern(String pattern) - throws HCatException { - List dbNames = null; - try { - dbNames = hmsClient.getDatabases(pattern); - } catch (MetaException exp) { - throw new HCatException("MetaException while listing db names", exp); - } - return dbNames; - } - - @Override - public HCatDatabase getDatabase(String dbName) throws HCatException { - HCatDatabase db = null; - try { - Database hiveDB = hmsClient.getDatabase(checkDB(dbName)); - if (hiveDB != null) { - db = new HCatDatabase(hiveDB); - } - } catch (NoSuchObjectException exp) { - throw new ObjectNotFoundException( - "NoSuchObjectException while fetching database", exp); - } catch (MetaException exp) { - throw new HCatException("MetaException while fetching database", - exp); - } catch (TException exp) { - throw new ConnectionFailureException( - "TException while fetching database", exp); - } - return db; - } - - @Override - public void createDatabase(HCatCreateDBDesc dbInfo) throws HCatException { - try { - hmsClient.createDatabase(dbInfo.toHiveDb()); - } catch (AlreadyExistsException exp) { - if (!dbInfo.getIfNotExists()) { - throw new HCatException( - "AlreadyExistsException while creating database", exp); - } - } catch (InvalidObjectException exp) { - throw new HCatException( - "InvalidObjectException while creating database", exp); - } catch (MetaException exp) { - throw new HCatException("MetaException while creating database", - exp); - } catch (TException exp) { - throw new ConnectionFailureException( - "TException while creating database", exp); - } - } - - @Override - public void dropDatabase(String dbName, boolean ifExists, DropDBMode mode) - throws HCatException { - boolean isCascade = mode.toString().equalsIgnoreCase("cascade"); - try { - hmsClient.dropDatabase(checkDB(dbName), true, ifExists, isCascade); - } catch (NoSuchObjectException e) { - if (!ifExists) { - throw new ObjectNotFoundException( - "NoSuchObjectException while dropping db.", e); - } - } catch (InvalidOperationException e) { - throw new HCatException( - "InvalidOperationException while dropping db.", e); - } catch (MetaException e) { - throw new HCatException("MetaException while dropping db.", e); - } catch (TException e) { - throw new ConnectionFailureException("TException while dropping db.", - e); - } - } - - @Override - public List listTableNamesByPattern(String dbName, - String tablePattern) throws HCatException { - List tableNames = null; - try { - tableNames = hmsClient.getTables(checkDB(dbName), tablePattern); - } catch (MetaException e) { - throw new HCatException( - "MetaException while fetching table names.", e); - } - return tableNames; - } - - @Override - public HCatTable getTable(String dbName, String tableName) - throws HCatException { - HCatTable table = null; - try { - Table hiveTable = hmsClient.getTable(checkDB(dbName), tableName); - if (hiveTable != null) { - table = new HCatTable(hiveTable); - } - } catch (MetaException e) { - throw new HCatException("MetaException while fetching table.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while fetching table.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while fetching table.", e); - } - return table; - } - - @Override - public void createTable(HCatCreateTableDesc createTableDesc) - throws HCatException { - try { - hmsClient.createTable(createTableDesc.toHiveTable(hiveConfig)); - } catch (AlreadyExistsException e) { - if (!createTableDesc.getIfNotExists()) { - throw new HCatException( - "AlreadyExistsException while creating table.", e); - } - } catch (InvalidObjectException e) { - throw new HCatException( - "InvalidObjectException while creating table.", e); - } catch (MetaException e) { - throw new HCatException("MetaException while creating table.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while creating table.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while creating table.", e); - } catch (IOException e) { - throw new HCatException("IOException while creating hive conf.", e); - } - - } - - @Override - public void updateTableSchema(String dbName, String tableName, List columnSchema) - throws HCatException { - try { - Table table = hmsClient.getTable(dbName, tableName); - table.getSd().setCols(HCatSchemaUtils.getFieldSchemas(columnSchema)); - hmsClient.alter_table(dbName, tableName, table); - } - catch (InvalidOperationException e) { - throw new HCatException("InvalidOperationException while updating table schema.", e); - } - catch (MetaException e) { - throw new HCatException("MetaException while updating table schema.", e); - } - catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while updating table schema.", e); - } - catch (TException e) { - throw new ConnectionFailureException( - "TException while updating table schema.", e); - } - } - - @Override - public void createTableLike(String dbName, String existingTblName, - String newTableName, boolean ifNotExists, boolean isExternal, - String location) throws HCatException { - - Table hiveTable = getHiveTableLike(checkDB(dbName), existingTblName, - newTableName, ifNotExists, location); - if (hiveTable != null) { - try { - hmsClient.createTable(hiveTable); - } catch (AlreadyExistsException e) { - if (!ifNotExists) { - throw new HCatException( - "A table already exists with the name " - + newTableName, e); - } - } catch (InvalidObjectException e) { - throw new HCatException( - "InvalidObjectException in create table like command.", - e); - } catch (MetaException e) { - throw new HCatException( - "MetaException in create table like command.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException in create table like command.", - e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException in create table like command.", e); - } - } - } - - @Override - public void dropTable(String dbName, String tableName, boolean ifExists) - throws HCatException { - try { - hmsClient.dropTable(checkDB(dbName), tableName, true, ifExists); - } catch (NoSuchObjectException e) { - if (!ifExists) { - throw new ObjectNotFoundException( - "NoSuchObjectException while dropping table.", e); - } - } catch (MetaException e) { - throw new HCatException("MetaException while dropping table.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while dropping table.", e); - } - } - - @Override - public void renameTable(String dbName, String oldName, String newName) - throws HCatException { - Table tbl; - try { - Table oldtbl = hmsClient.getTable(checkDB(dbName), oldName); - if (oldtbl != null) { - // TODO : Should be moved out. - if (oldtbl - .getParameters() - .get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE) != null) { - throw new HCatException( - "Cannot use rename command on a non-native table"); - } - tbl = new Table(oldtbl); - tbl.setTableName(newName); - hmsClient.alter_table(checkDB(dbName), oldName, tbl); - } - } catch (MetaException e) { - throw new HCatException("MetaException while renaming table", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while renaming table", e); - } catch (InvalidOperationException e) { - throw new HCatException( - "InvalidOperationException while renaming table", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while renaming table", e); - } - } - - @Override - public List getPartitions(String dbName, String tblName) - throws HCatException { - List hcatPtns = new ArrayList(); - try { - List hivePtns = hmsClient.listPartitions( - checkDB(dbName), tblName, (short) -1); - for (Partition ptn : hivePtns) { - hcatPtns.add(new HCatPartition(ptn)); - } - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while retrieving partition.", e); - } catch (MetaException e) { - throw new HCatException( - "MetaException while retrieving partition.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while retrieving partition.", e); - } - return hcatPtns; - } - - @Override - public List getPartitions(String dbName, String tblName, Map partitionSpec) throws HCatException { - return listPartitionsByFilter(dbName, tblName, getFilterString(partitionSpec)); - } - - private static String getFilterString(Map partitionSpec) { - final String AND = " AND "; - - StringBuilder filter = new StringBuilder(); - for (Map.Entry entry : partitionSpec.entrySet()) { - filter.append(entry.getKey()).append("=").append("\"").append(entry.getValue()).append("\"").append(AND); - } - - int length = filter.toString().length(); - if (length > 0) - filter.delete(length - AND.length(), length); - - return filter.toString(); - } - - @Override - public HCatPartition getPartition(String dbName, String tableName, - Map partitionSpec) throws HCatException { - HCatPartition partition = null; - try { - List partitionColumns = getTable(checkDB(dbName), tableName).getPartCols(); - if (partitionColumns.size() != partitionSpec.size()) { - throw new HCatException("Partition-spec doesn't have the right number of partition keys."); - } - - ArrayList ptnValues = new ArrayList(); - for (HCatFieldSchema partitionColumn : partitionColumns) { - String partKey = partitionColumn.getName(); - if (partitionSpec.containsKey(partKey)) { - ptnValues.add(partitionSpec.get(partKey)); // Partition-keys added in order. - } - else { - throw new HCatException("Invalid partition-key specified: " + partKey); - } - } - Partition hivePartition = hmsClient.getPartition(checkDB(dbName), - tableName, ptnValues); - if (hivePartition != null) { - partition = new HCatPartition(hivePartition); - } - } catch (MetaException e) { - throw new HCatException( - "MetaException while retrieving partition.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while retrieving partition.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while retrieving partition.", e); - } - return partition; - } - - @Override - public void addPartition(HCatAddPartitionDesc partInfo) - throws HCatException { - Table tbl = null; - try { - tbl = hmsClient.getTable(partInfo.getDatabaseName(), - partInfo.getTableName()); - // TODO: Should be moved out. - if (tbl.getPartitionKeysSize() == 0) { - throw new HCatException("The table " + partInfo.getTableName() - + " is not partitioned."); - } - - hmsClient.add_partition(partInfo.toHivePartition(tbl)); - } catch (InvalidObjectException e) { - throw new HCatException( - "InvalidObjectException while adding partition.", e); - } catch (AlreadyExistsException e) { - throw new HCatException( - "AlreadyExistsException while adding partition.", e); - } catch (MetaException e) { - throw new HCatException("MetaException while adding partition.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException("The table " + partInfo.getTableName() - + " is could not be found.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while adding partition.", e); - } - } - - @Override - public void dropPartitions(String dbName, String tableName, - Map partitionSpec, boolean ifExists) - throws HCatException { - try { - dbName = checkDB(dbName); - List partitions = hmsClient.listPartitionsByFilter(dbName, tableName, - getFilterString(partitionSpec), (short)-1); - - for (Partition partition : partitions) { - dropPartition(partition, ifExists); - } - - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while dropping partition. " + - "Either db(" + dbName + ") or table(" + tableName + ") missing.", e); - } catch (MetaException e) { - throw new HCatException("MetaException while dropping partition.", - e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while dropping partition.", e); - } - } - - private void dropPartition(Partition partition, boolean ifExists) - throws HCatException, MetaException, TException { - try { - hmsClient.dropPartition(partition.getDbName(), partition.getTableName(), partition.getValues()); - } catch (NoSuchObjectException e) { - if (!ifExists) { - throw new ObjectNotFoundException( - "NoSuchObjectException while dropping partition: " + partition.getValues(), e); - } - } - } - - @Override - public List listPartitionsByFilter(String dbName, - String tblName, String filter) throws HCatException { - List hcatPtns = new ArrayList(); - try { - List hivePtns = hmsClient.listPartitionsByFilter( - checkDB(dbName), tblName, filter, (short) -1); - for (Partition ptn : hivePtns) { - hcatPtns.add(new HCatPartition(ptn)); - } - } catch (MetaException e) { - throw new HCatException("MetaException while fetching partitions.", - e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while fetching partitions.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while fetching partitions.", e); - } - return hcatPtns; - } - - @Override - public void markPartitionForEvent(String dbName, String tblName, - Map partKVs, PartitionEventType eventType) - throws HCatException { - try { - hmsClient.markPartitionForEvent(checkDB(dbName), tblName, partKVs, - eventType); - } catch (MetaException e) { - throw new HCatException( - "MetaException while marking partition for event.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while marking partition for event.", - e); - } catch (UnknownTableException e) { - throw new HCatException( - "UnknownTableException while marking partition for event.", - e); - } catch (UnknownDBException e) { - throw new HCatException( - "UnknownDBException while marking partition for event.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while marking partition for event.", e); - } - } - - @Override - public boolean isPartitionMarkedForEvent(String dbName, String tblName, - Map partKVs, PartitionEventType eventType) - throws HCatException { - boolean isMarked = false; - try { - isMarked = hmsClient.isPartitionMarkedForEvent(checkDB(dbName), - tblName, partKVs, eventType); - } catch (MetaException e) { - throw new HCatException( - "MetaException while checking partition for event.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException( - "NoSuchObjectException while checking partition for event.", - e); - } catch (UnknownTableException e) { - throw new HCatException( - "UnknownTableException while checking partition for event.", - e); - } catch (UnknownDBException e) { - throw new HCatException( - "UnknownDBException while checking partition for event.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while checking partition for event.", e); - } - return isMarked; - } - - @Override - public String getDelegationToken(String owner, - String renewerKerberosPrincipalName) throws HCatException { - String token = null; - try { - token = hmsClient.getDelegationToken(owner, - renewerKerberosPrincipalName); - } catch (MetaException e) { - throw new HCatException( - "MetaException while getting delegation token.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while getting delegation token.", e); - } - - return token; - } - - @Override - public long renewDelegationToken(String tokenStrForm) throws HCatException { - long time = 0; - try { - time = hmsClient.renewDelegationToken(tokenStrForm); - } catch (MetaException e) { - throw new HCatException( - "MetaException while renewing delegation token.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while renewing delegation token.", e); - } - - return time; - } - - @Override - public void cancelDelegationToken(String tokenStrForm) - throws HCatException { - try { - hmsClient.cancelDelegationToken(tokenStrForm); - } catch (MetaException e) { - throw new HCatException( - "MetaException while canceling delegation token.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while canceling delegation token.", e); - } - } - - /* - * @param conf /* @throws HCatException,ConnectionFailureException - * - * @see - * org.apache.hcatalog.api.HCatClient#initialize(org.apache.hadoop.conf. - * Configuration) - */ - @Override - void initialize(Configuration conf) throws HCatException { - this.config = conf; - try { - hiveConfig = HCatUtil.getHiveConf(config); - hmsClient = HCatUtil.getHiveClient(hiveConfig); - } catch (MetaException exp) { - throw new HCatException("MetaException while creating HMS client", - exp); - } catch (IOException exp) { - throw new HCatException("IOException while creating HMS client", - exp); - } - - } - - private Table getHiveTableLike(String dbName, String existingTblName, - String newTableName, boolean isExternal, String location) - throws HCatException { - Table oldtbl = null; - Table newTable = null; - try { - oldtbl = hmsClient.getTable(checkDB(dbName), existingTblName); - } catch (MetaException e1) { - throw new HCatException( - "MetaException while retrieving existing table.", e1); - } catch (NoSuchObjectException e1) { - throw new ObjectNotFoundException( - "NoSuchObjectException while retrieving existing table.", - e1); - } catch (TException e1) { - throw new ConnectionFailureException( - "TException while retrieving existing table.", e1); - } - if (oldtbl != null) { - newTable = new Table(); - newTable.setTableName(newTableName); - newTable.setDbName(dbName); - StorageDescriptor sd = new StorageDescriptor(oldtbl.getSd()); - newTable.setSd(sd); - newTable.setParameters(oldtbl.getParameters()); - if (location == null) { - newTable.getSd().setLocation(oldtbl.getSd().getLocation()); - } else { - newTable.getSd().setLocation(location); - } - if (isExternal) { - newTable.putToParameters("EXTERNAL", "TRUE"); - newTable.setTableType(TableType.EXTERNAL_TABLE.toString()); - } else { - newTable.getParameters().remove("EXTERNAL"); - } - // set create time - newTable.setCreateTime((int) (System.currentTimeMillis() / 1000)); - newTable.setLastAccessTimeIsSet(false); - } - return newTable; - } - - /* - * @throws HCatException - * - * @see org.apache.hcatalog.api.HCatClient#closeClient() - */ - @Override - public void close() throws HCatException { - hmsClient.close(); - } - - private String checkDB(String name) { - if (StringUtils.isEmpty(name)) { - return MetaStoreUtils.DEFAULT_DATABASE_NAME; - } else { - return name; - } - } - - /* - * @param partInfoList - * @return The size of the list of partitions. - * @throws HCatException,ConnectionFailureException - * @see org.apache.hcatalog.api.HCatClient#addPartitions(java.util.List) - */ - @Override - public int addPartitions(List partInfoList) - throws HCatException { - int numPartitions = -1; - if ((partInfoList == null) || (partInfoList.size() == 0)) { - throw new HCatException("The partition list is null or empty."); - } - - Table tbl = null; - try { - tbl = hmsClient.getTable(partInfoList.get(0).getDatabaseName(), - partInfoList.get(0).getTableName()); - ArrayList ptnList = new ArrayList(); - for (HCatAddPartitionDesc desc : partInfoList) { - ptnList.add(desc.toHivePartition(tbl)); - } - numPartitions = hmsClient.add_partitions(ptnList); - } catch (InvalidObjectException e) { - throw new HCatException( - "InvalidObjectException while adding partition.", e); - } catch (AlreadyExistsException e) { - throw new HCatException( - "AlreadyExistsException while adding partition.", e); - } catch (MetaException e) { - throw new HCatException("MetaException while adding partition.", e); - } catch (NoSuchObjectException e) { - throw new ObjectNotFoundException("The table " - + partInfoList.get(0).getTableName() - + " is could not be found.", e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while adding partition.", e); - } - return numPartitions; - } - - @Override - public String getMessageBusTopicName(String dbName, String tableName) throws HCatException { - try { - return hmsClient.getTable(dbName, tableName).getParameters().get(HCatConstants.HCAT_MSGBUS_TOPIC_NAME); - } - catch (MetaException e) { - throw new HCatException("MetaException while retrieving JMS Topic name.", e); - } catch (NoSuchObjectException e) { - throw new HCatException("Could not find DB:" + dbName + " or Table:" + tableName, e); - } catch (TException e) { - throw new ConnectionFailureException( - "TException while retrieving JMS Topic name.", e); - } - } -} diff --git hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatCreateDBDesc.java hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatCreateDBDesc.java deleted file mode 100644 index c22e542..0000000 --- hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatCreateDBDesc.java +++ /dev/null @@ -1,194 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.api; - -import java.util.Map; - -import org.apache.hadoop.hive.metastore.api.Database; -import org.apache.hcatalog.common.HCatException; - -/** - * The Class HCatCreateDBDesc for defining database attributes. - */ -public class HCatCreateDBDesc { - - private String dbName; - private String locationUri; - private String comment; - private Map dbProperties; - private boolean ifNotExits = false; - - /** - * Gets the database properties. - * - * @return the database properties - */ - public Map getDatabaseProperties() { - return this.dbProperties; - } - - /** - * Gets the if not exists. - * - * @return the if not exists - */ - public boolean getIfNotExists() { - return this.ifNotExits; - } - - /** - * Gets the comments. - * - * @return the comments - */ - public String getComments() { - return this.comment; - } - - /** - * Gets the location. - * - * @return the location - */ - public String getLocation() { - return this.locationUri; - } - - /** - * Gets the database name. - * - * @return the database name - */ - public String getDatabaseName() { - return this.dbName; - } - - private HCatCreateDBDesc(String dbName) { - this.dbName = dbName; - } - - @Override - public String toString() { - return "HCatCreateDBDesc [" - + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") - + (locationUri != null ? "location=" + locationUri + ", " - : "location=null") - + (comment != null ? "comment=" + comment + ", " : "comment=null") - + (dbProperties != null ? "dbProperties=" + dbProperties + ", " - : "dbProperties=null") + "ifNotExits=" + ifNotExits + "]"; - } - - /** - * Creates the builder for defining attributes. - * - * @param dbName the db name - * @return the builder - */ - public static Builder create(String dbName) { - return new Builder(dbName); - } - - Database toHiveDb() { - Database hiveDB = new Database(); - hiveDB.setDescription(this.comment); - hiveDB.setLocationUri(this.locationUri); - hiveDB.setName(this.dbName); - hiveDB.setParameters(this.dbProperties); - return hiveDB; - } - - public static class Builder { - - private String innerLoc; - private String innerComment; - private Map innerDBProps; - private String dbName; - private boolean ifNotExists = false; - - private Builder(String dbName) { - this.dbName = dbName; - } - - /** - * Location. - * - * @param value the location of the database. - * @return the builder - */ - public Builder location(String value) { - this.innerLoc = value; - return this; - } - - /** - * Comment. - * - * @param value comments. - * @return the builder - */ - public Builder comment(String value) { - this.innerComment = value; - return this; - } - - /** - * If not exists. - * @param ifNotExists If set to true, hive will not throw exception, if a - * database with the same name already exists. - * @return the builder - */ - public Builder ifNotExists(boolean ifNotExists) { - this.ifNotExists = ifNotExists; - return this; - } - - /** - * Database properties. - * - * @param dbProps the database properties - * @return the builder - */ - public Builder databaseProperties(Map dbProps) { - this.innerDBProps = dbProps; - return this; - } - - - /** - * Builds the create database descriptor. - * - * @return An instance of HCatCreateDBDesc - * @throws HCatException - */ - public HCatCreateDBDesc build() throws HCatException { - if (this.dbName == null) { - throw new HCatException("Database name cannot be null."); - } - HCatCreateDBDesc desc = new HCatCreateDBDesc(this.dbName); - desc.comment = this.innerComment; - desc.locationUri = this.innerLoc; - desc.dbProperties = this.innerDBProps; - desc.ifNotExits = this.ifNotExists; - return desc; - - } - - } - -} diff --git hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatCreateTableDesc.java hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatCreateTableDesc.java deleted file mode 100644 index 6059635..0000000 --- hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatCreateTableDesc.java +++ /dev/null @@ -1,520 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.api; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.TableType; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Order; -import org.apache.hadoop.hive.metastore.api.SerDeInfo; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; -import org.apache.hadoop.hive.ql.metadata.HiveUtils; -import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; -import org.apache.hadoop.mapred.SequenceFileInputFormat; -import org.apache.hadoop.mapred.SequenceFileOutputFormat; -import org.apache.hadoop.mapred.TextInputFormat; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * The Class HCatCreateTableDesc for defining attributes for a new table. - */ -@SuppressWarnings("deprecation") -public class HCatCreateTableDesc { - - private static final Logger LOG = LoggerFactory.getLogger(HCatCreateTableDesc.class); - - private String tableName; - private String dbName; - private boolean isExternal; - private String comment; - private String location; - private List cols; - private List partCols; - private List bucketCols; - private int numBuckets; - private List sortCols; - private Map tblProps; - private boolean ifNotExists; - private String fileFormat; - private String inputformat; - private String outputformat; - private String serde; - private String storageHandler; - - private HCatCreateTableDesc(String dbName, String tableName, List columns) { - this.dbName = dbName; - this.tableName = tableName; - this.cols = columns; - } - - /** - * Creates a builder for defining attributes. - * - * @param dbName the db name - * @param tableName the table name - * @param columns the columns - * @return the builder - */ - public static Builder create(String dbName, String tableName, List columns) { - return new Builder(dbName, tableName, columns); - } - - Table toHiveTable(HiveConf conf) throws HCatException { - - Table newTable = new Table(); - newTable.setDbName(dbName); - newTable.setTableName(tableName); - if (tblProps != null) { - newTable.setParameters(tblProps); - } - - if (isExternal) { - newTable.putToParameters("EXTERNAL", "TRUE"); - newTable.setTableType(TableType.EXTERNAL_TABLE.toString()); - } else { - newTable.setTableType(TableType.MANAGED_TABLE.toString()); - } - - StorageDescriptor sd = new StorageDescriptor(); - sd.setSerdeInfo(new SerDeInfo()); - if (location != null) { - sd.setLocation(location); - } - if (this.comment != null) { - newTable.putToParameters("comment", comment); - } - if (!StringUtils.isEmpty(fileFormat)) { - sd.setInputFormat(inputformat); - sd.setOutputFormat(outputformat); - if (serde != null) { - sd.getSerdeInfo().setSerializationLib(serde); - } else { - LOG.info("Using LazySimpleSerDe for table " + tableName); - sd.getSerdeInfo() - .setSerializationLib( - org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class - .getName()); - } - } else { - try { - LOG.info("Creating instance of storage handler to get input/output, serder info."); - HiveStorageHandler sh = HiveUtils.getStorageHandler(conf, - storageHandler); - sd.setInputFormat(sh.getInputFormatClass().getName()); - sd.setOutputFormat(sh.getOutputFormatClass().getName()); - sd.getSerdeInfo().setSerializationLib( - sh.getSerDeClass().getName()); - newTable.putToParameters( - org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, - storageHandler); - } catch (HiveException e) { - throw new HCatException( - "Exception while creating instance of storage handler", - e); - } - } - newTable.setSd(sd); - if (this.partCols != null) { - ArrayList hivePtnCols = new ArrayList(); - for (HCatFieldSchema fs : this.partCols) { - hivePtnCols.add(HCatSchemaUtils.getFieldSchema(fs)); - } - newTable.setPartitionKeys(hivePtnCols); - } - - if (this.cols != null) { - ArrayList hiveTblCols = new ArrayList(); - for (HCatFieldSchema fs : this.cols) { - hiveTblCols.add(HCatSchemaUtils.getFieldSchema(fs)); - } - newTable.getSd().setCols(hiveTblCols); - } - - if (this.bucketCols != null) { - newTable.getSd().setBucketCols(bucketCols); - newTable.getSd().setNumBuckets(numBuckets); - } - - if (this.sortCols != null) { - newTable.getSd().setSortCols(sortCols); - } - - newTable.setCreateTime((int) (System.currentTimeMillis() / 1000)); - newTable.setLastAccessTimeIsSet(false); - return newTable; - } - - /** - * Gets the if not exists. - * - * @return the if not exists - */ - public boolean getIfNotExists() { - return this.ifNotExists; - } - - /** - * Gets the table name. - * - * @return the table name - */ - public String getTableName() { - return this.tableName; - } - - /** - * Gets the cols. - * - * @return the cols - */ - public List getCols() { - return this.cols; - } - - /** - * Gets the partition cols. - * - * @return the partition cols - */ - public List getPartitionCols() { - return this.partCols; - } - - /** - * Gets the bucket cols. - * - * @return the bucket cols - */ - public List getBucketCols() { - return this.bucketCols; - } - - public int getNumBuckets() { - return this.numBuckets; - } - - /** - * Gets the comments. - * - * @return the comments - */ - public String getComments() { - return this.comment; - } - - /** - * Gets the storage handler. - * - * @return the storage handler - */ - public String getStorageHandler() { - return this.storageHandler; - } - - /** - * Gets the location. - * - * @return the location - */ - public String getLocation() { - return this.location; - } - - /** - * Gets the external. - * - * @return the external - */ - public boolean getExternal() { - return this.isExternal; - } - - /** - * Gets the sort cols. - * - * @return the sort cols - */ - public List getSortCols() { - return this.sortCols; - } - - /** - * Gets the tbl props. - * - * @return the tbl props - */ - public Map getTblProps() { - return this.tblProps; - } - - /** - * Gets the file format. - * - * @return the file format - */ - public String getFileFormat() { - return this.fileFormat; - } - - /** - * Gets the database name. - * - * @return the database name - */ - public String getDatabaseName() { - return this.dbName; - } - - @Override - public String toString() { - return "HCatCreateTableDesc [" - + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") - + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") - + "isExternal=" - + isExternal - + ", " - + (comment != null ? "comment=" + comment + ", " : "comment=null") - + (location != null ? "location=" + location + ", " : "location=null") - + (cols != null ? "cols=" + cols + ", " : "cols=null") - + (partCols != null ? "partCols=" + partCols + ", " : "partCols=null") - + (bucketCols != null ? "bucketCols=" + bucketCols + ", " : "bucketCols=null") - + "numBuckets=" - + numBuckets - + ", " - + (sortCols != null ? "sortCols=" + sortCols + ", " : "sortCols=null") - + (tblProps != null ? "tblProps=" + tblProps + ", " : "tblProps=null") - + "ifNotExists=" - + ifNotExists - + ", " - + (fileFormat != null ? "fileFormat=" + fileFormat + ", " : "fileFormat=null") - + (inputformat != null ? "inputformat=" + inputformat + ", " - : "inputformat=null") - + (outputformat != null ? "outputformat=" + outputformat + ", " - : "outputformat=null") - + (serde != null ? "serde=" + serde + ", " : "serde=null") - + (storageHandler != null ? "storageHandler=" + storageHandler - : "storageHandler=null") + "]"; - } - - public static class Builder { - - private String tableName; - private boolean isExternal; - private List cols; - private List partCols; - private List bucketCols; - private List sortCols; - private int numBuckets; - private String comment; - private String fileFormat; - private String location; - private String storageHandler; - private Map tblProps; - private boolean ifNotExists; - private String dbName; - - - private Builder(String dbName, String tableName, List columns) { - this.dbName = dbName; - this.tableName = tableName; - this.cols = columns; - } - - - /** - * If not exists. - * - * @param ifNotExists If set to true, hive will not throw exception, if a - * table with the same name already exists. - * @return the builder - */ - public Builder ifNotExists(boolean ifNotExists) { - this.ifNotExists = ifNotExists; - return this; - } - - - /** - * Partition cols. - * - * @param partCols the partition cols - * @return the builder - */ - public Builder partCols(List partCols) { - this.partCols = partCols; - return this; - } - - - /** - * Bucket cols. - * - * @param bucketCols the bucket cols - * @return the builder - */ - public Builder bucketCols(List bucketCols, int buckets) { - this.bucketCols = bucketCols; - this.numBuckets = buckets; - return this; - } - - /** - * Storage handler. - * - * @param storageHandler the storage handler - * @return the builder - */ - public Builder storageHandler(String storageHandler) { - this.storageHandler = storageHandler; - return this; - } - - /** - * Location. - * - * @param location the location - * @return the builder - */ - public Builder location(String location) { - this.location = location; - return this; - } - - /** - * Comments. - * - * @param comment the comment - * @return the builder - */ - public Builder comments(String comment) { - this.comment = comment; - return this; - } - - /** - * Checks if is table external. - * - * @param isExternal the is external - * @return the builder - */ - public Builder isTableExternal(boolean isExternal) { - this.isExternal = isExternal; - return this; - } - - /** - * Sort cols. - * - * @param sortCols the sort cols - * @return the builder - */ - public Builder sortCols(ArrayList sortCols) { - this.sortCols = sortCols; - return this; - } - - /** - * Tbl props. - * - * @param tblProps the tbl props - * @return the builder - */ - public Builder tblProps(Map tblProps) { - this.tblProps = tblProps; - return this; - } - - /** - * File format. - * - * @param format the format - * @return the builder - */ - public Builder fileFormat(String format) { - this.fileFormat = format; - return this; - } - - /** - * Builds the HCatCreateTableDesc. - * - * @return HCatCreateTableDesc - * @throws HCatException - */ - public HCatCreateTableDesc build() throws HCatException { - if (this.dbName == null) { - LOG.info("Database name found null. Setting db to :" - + MetaStoreUtils.DEFAULT_DATABASE_NAME); - this.dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; - } - HCatCreateTableDesc desc = new HCatCreateTableDesc(this.dbName, - this.tableName, this.cols); - desc.ifNotExists = this.ifNotExists; - desc.isExternal = this.isExternal; - desc.comment = this.comment; - desc.partCols = this.partCols; - desc.bucketCols = this.bucketCols; - desc.numBuckets = this.numBuckets; - desc.location = this.location; - desc.tblProps = this.tblProps; - desc.sortCols = this.sortCols; - desc.serde = null; - if (!StringUtils.isEmpty(fileFormat)) { - desc.fileFormat = fileFormat; - if ("SequenceFile".equalsIgnoreCase(fileFormat)) { - desc.inputformat = SequenceFileInputFormat.class.getName(); - desc.outputformat = SequenceFileOutputFormat.class - .getName(); - } else if ("RCFile".equalsIgnoreCase(fileFormat)) { - desc.inputformat = RCFileInputFormat.class.getName(); - desc.outputformat = RCFileOutputFormat.class.getName(); - desc.serde = ColumnarSerDe.class.getName(); - } - desc.storageHandler = StringUtils.EMPTY; - } else if (!StringUtils.isEmpty(storageHandler)) { - desc.storageHandler = storageHandler; - } else { - desc.fileFormat = "TextFile"; - LOG.info("Using text file format for the table."); - desc.inputformat = TextInputFormat.class.getName(); - LOG.info("Table input format:" + desc.inputformat); - desc.outputformat = IgnoreKeyTextOutputFormat.class - .getName(); - LOG.info("Table output format:" + desc.outputformat); - } - return desc; - } - } -} diff --git hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatDatabase.java hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatDatabase.java deleted file mode 100644 index d9d9ca8..0000000 --- hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatDatabase.java +++ /dev/null @@ -1,88 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.api; - -import java.util.Map; - -import org.apache.hadoop.hive.metastore.api.Database; - - -/** - * HCatDatabase is wrapper class around org.apache.hadoop.hive.metastore.api.Database. - */ -public class HCatDatabase { - - private String dbName; - private String dbLocation; - private String comment; - private Map props; - - HCatDatabase(Database db) { - this.dbName = db.getName(); - this.props = db.getParameters(); - this.dbLocation = db.getLocationUri(); - this.comment = db.getDescription(); - } - - /** - * Gets the database name. - * - * @return the database name - */ - public String getName() { - return dbName; - } - - /** - * Gets the dB location. - * - * @return the dB location - */ - public String getLocation() { - return dbLocation; - } - - /** - * Gets the comment. - * - * @return the comment - */ - public String getComment() { - return comment; - } - - /** - * Gets the dB properties. - * - * @return the dB properties - */ - public Map getProperties() { - return props; - } - - @Override - public String toString() { - return "HCatDatabase [" - + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") - + (dbLocation != null ? "dbLocation=" + dbLocation + ", " : "dbLocation=null") - + (comment != null ? "comment=" + comment + ", " : "comment=null") - + (props != null ? "props=" + props : "props=null") + "]"; - } - -} diff --git hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatPartition.java hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatPartition.java deleted file mode 100644 index 8040931..0000000 --- hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatPartition.java +++ /dev/null @@ -1,204 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.api; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Order; -import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; - -/** - * The HCatPartition is a wrapper around org.apache.hadoop.hive.metastore.api.Partition. - */ -public class HCatPartition { - - private String tableName; - private String dbName; - private List values; - private List tableCols; - private int createTime; - private int lastAccessTime; - private StorageDescriptor sd; - private Map parameters; - - HCatPartition(Partition partition) throws HCatException { - this.tableName = partition.getTableName(); - this.dbName = partition.getDbName(); - this.createTime = partition.getCreateTime(); - this.lastAccessTime = partition.getLastAccessTime(); - this.parameters = partition.getParameters(); - this.values = partition.getValues(); - this.sd = partition.getSd(); - this.tableCols = new ArrayList(); - for (FieldSchema fs : this.sd.getCols()) { - this.tableCols.add(HCatSchemaUtils.getHCatFieldSchema(fs)); - } - } - - /** - * Gets the table name. - * - * @return the table name - */ - public String getTableName() { - return this.tableName; - } - - /** - * Gets the database name. - * - * @return the database name - */ - public String getDatabaseName() { - return this.dbName; - } - - /** - * Gets the columns of the table. - * - * @return the columns - */ - public List getColumns() { - return this.tableCols; - } - - /** - * Gets the input format. - * - * @return the input format - */ - public String getInputFormat() { - return this.sd.getInputFormat(); - } - - /** - * Gets the output format. - * - * @return the output format - */ - public String getOutputFormat() { - return this.sd.getOutputFormat(); - } - - /** - * Gets the storage handler. - * - * @return the storage handler - */ - public String getStorageHandler() { - return this.sd - .getParameters() - .get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE); - } - - /** - * Gets the location. - * - * @return the location - */ - public String getLocation() { - return this.sd.getLocation(); - } - - /** - * Gets the serde. - * - * @return the serde - */ - public String getSerDe() { - return this.sd.getSerdeInfo().getSerializationLib(); - } - - public Map getParameters() { - return this.parameters; - } - - /** - * Gets the last access time. - * - * @return the last access time - */ - public int getLastAccessTime() { - return this.lastAccessTime; - } - - /** - * Gets the creates the time. - * - * @return the creates the time - */ - public int getCreateTime() { - return this.createTime; - } - - /** - * Gets the values. - * - * @return the values - */ - public List getValues() { - return this.values; - } - - /** - * Gets the bucket columns. - * - * @return the bucket columns - */ - public List getBucketCols() { - return this.sd.getBucketCols(); - } - - /** - * Gets the number of buckets. - * - * @return the number of buckets - */ - public int getNumBuckets() { - return this.sd.getNumBuckets(); - } - - /** - * Gets the sort columns. - * - * @return the sort columns - */ - public List getSortCols() { - return this.sd.getSortCols(); - } - - @Override - public String toString() { - return "HCatPartition [" - + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") - + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") - + (values != null ? "values=" + values + ", " : "values=null") - + "createTime=" + createTime + ", lastAccessTime=" - + lastAccessTime + ", " + (sd != null ? "sd=" + sd + ", " : "sd=null") - + (parameters != null ? "parameters=" + parameters : "parameters=null") + "]"; - } - -} diff --git hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatTable.java hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatTable.java deleted file mode 100644 index 6402eb7..0000000 --- hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/HCatTable.java +++ /dev/null @@ -1,227 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.api; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.Order; -import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatSchemaUtils; - -/** - * The HCatTable is a wrapper around org.apache.hadoop.hive.metastore.api.Table. - */ -public class HCatTable { - - private String tableName; - private String tabletype; - private List cols; - private List partCols; - private List bucketCols; - private List sortCols; - private int numBuckets; - private String inputFileFormat; - private String outputFileFormat; - private String storageHandler; - private Map tblProps; - private String dbName; - private String serde; - private String location; - - HCatTable(Table hiveTable) throws HCatException { - this.tableName = hiveTable.getTableName(); - this.dbName = hiveTable.getDbName(); - this.tabletype = hiveTable.getTableType(); - cols = new ArrayList(); - for (FieldSchema colFS : hiveTable.getSd().getCols()) { - cols.add(HCatSchemaUtils.getHCatFieldSchema(colFS)); - } - partCols = new ArrayList(); - for (FieldSchema colFS : hiveTable.getPartitionKeys()) { - partCols.add(HCatSchemaUtils.getHCatFieldSchema(colFS)); - } - bucketCols = hiveTable.getSd().getBucketCols(); - sortCols = hiveTable.getSd().getSortCols(); - numBuckets = hiveTable.getSd().getNumBuckets(); - inputFileFormat = hiveTable.getSd().getInputFormat(); - outputFileFormat = hiveTable.getSd().getOutputFormat(); - storageHandler = hiveTable - .getSd() - .getParameters() - .get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE); - tblProps = hiveTable.getParameters(); - serde = hiveTable.getSd().getSerdeInfo().getSerializationLib(); - location = hiveTable.getSd().getLocation(); - } - - /** - * Gets the table name. - * - * @return the table name - */ - public String getTableName() { - return tableName; - } - - /** - * Gets the db name. - * - * @return the db name - */ - public String getDbName() { - return dbName; - } - - /** - * Gets the columns. - * - * @return the columns - */ - public List getCols() { - return cols; - } - - /** - * Gets the part columns. - * - * @return the part columns - */ - public List getPartCols() { - return partCols; - } - - /** - * Gets the bucket columns. - * - * @return the bucket columns - */ - public List getBucketCols() { - return bucketCols; - } - - /** - * Gets the sort columns. - * - * @return the sort columns - */ - public List getSortCols() { - return sortCols; - } - - /** - * Gets the number of buckets. - * - * @return the number of buckets - */ - public int getNumBuckets() { - return numBuckets; - } - - /** - * Gets the storage handler. - * - * @return the storage handler - */ - public String getStorageHandler() { - return storageHandler; - } - - /** - * Gets the table props. - * - * @return the table props - */ - public Map getTblProps() { - return tblProps; - } - - /** - * Gets the tabletype. - * - * @return the tabletype - */ - public String getTabletype() { - return tabletype; - } - - /** - * Gets the input file format. - * - * @return the input file format - */ - public String getInputFileFormat() { - return inputFileFormat; - } - - /** - * Gets the output file format. - * - * @return the output file format - */ - public String getOutputFileFormat() { - return outputFileFormat; - } - - /** - * Gets the serde lib. - * - * @return the serde lib - */ - public String getSerdeLib() { - return serde; - } - - /** - * Gets the location. - * - * @return the location - */ - public String getLocation() { - return location; - } - - @Override - public String toString() { - return "HCatTable [" - + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") - + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") - + (tabletype != null ? "tabletype=" + tabletype + ", " : "tabletype=null") - + (cols != null ? "cols=" + cols + ", " : "cols=null") - + (partCols != null ? "partCols=" + partCols + ", " : "partCols==null") - + (bucketCols != null ? "bucketCols=" + bucketCols + ", " : "bucketCols=null") - + (sortCols != null ? "sortCols=" + sortCols + ", " : "sortCols=null") - + "numBuckets=" - + numBuckets - + ", " - + (inputFileFormat != null ? "inputFileFormat=" - + inputFileFormat + ", " : "inputFileFormat=null") - + (outputFileFormat != null ? "outputFileFormat=" - + outputFileFormat + ", " : "outputFileFormat=null") - + (storageHandler != null ? "storageHandler=" + storageHandler - + ", " : "storageHandler=null") - + (tblProps != null ? "tblProps=" + tblProps + ", " : "tblProps=null") - + (serde != null ? "serde=" + serde + ", " : "serde=") - + (location != null ? "location=" + location : "location=") + "]"; - } -} diff --git hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/ObjectNotFoundException.java hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/ObjectNotFoundException.java deleted file mode 100644 index be1892e..0000000 --- hcatalog/webhcat/java-client/src/main/java/org/apache/hcatalog/api/ObjectNotFoundException.java +++ /dev/null @@ -1,39 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.api; - -import org.apache.hcatalog.common.HCatException; - -/** - * This exception is thrown when a Database, Table or Partition - * specified in an HCatalog query is not found. - */ -public class ObjectNotFoundException extends HCatException { - - private static final long serialVersionUID = 1L; - - /** - * @param message Exception message. - * @param cause The wrapped Throwable that caused this exception. - */ - public ObjectNotFoundException(String message, Throwable cause) { - super(message, cause); - } -} diff --git hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/ConnectionFailureException.java hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/ConnectionFailureException.java new file mode 100644 index 0000000..4c70bae --- /dev/null +++ hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/ConnectionFailureException.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.api; + +import org.apache.hive.hcatalog.common.HCatException; +/** + * Class representing exceptions resulting from connection problems + * between HCat client and server. + */ +public class ConnectionFailureException extends HCatException { + + private static final long serialVersionUID = 1L; + + /** + * @param message Exception message. + * @param cause The wrapped Throwable that caused this exception. + */ + public ConnectionFailureException(String message, Throwable cause) { + super(message, cause); + } + +} diff --git hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatAddPartitionDesc.java hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatAddPartitionDesc.java new file mode 100644 index 0000000..17fc956 --- /dev/null +++ hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatAddPartitionDesc.java @@ -0,0 +1,185 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.api; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hive.hcatalog.common.HCatException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The Class HCatAddPartitionDesc helps users in defining partition attributes. + */ +public class HCatAddPartitionDesc { + + private static final Logger LOG = LoggerFactory.getLogger(HCatAddPartitionDesc.class); + private String tableName; + private String dbName; + private String location; + private Map partSpec; + + private HCatAddPartitionDesc(String dbName, String tbl, String loc, Map spec) { + this.dbName = dbName; + this.tableName = tbl; + this.location = loc; + this.partSpec = spec; + } + + /** + * Gets the location. + * + * @return the location + */ + public String getLocation() { + return this.location; + } + + + /** + * Gets the partition spec. + * + * @return the partition spec + */ + public Map getPartitionSpec() { + return this.partSpec; + } + + /** + * Gets the table name. + * + * @return the table name + */ + public String getTableName() { + return this.tableName; + } + + /** + * Gets the database name. + * + * @return the database name + */ + public String getDatabaseName() { + return this.dbName; + } + + @Override + public String toString() { + return "HCatAddPartitionDesc [" + + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") + + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") + + (location != null ? "location=" + location + ", " : "location=null") + + (partSpec != null ? "partSpec=" + partSpec : "partSpec=null") + "]"; + } + + /** + * Creates the builder for specifying attributes. + * + * @param dbName the db name + * @param tableName the table name + * @param location the location + * @param partSpec the part spec + * @return the builder + * @throws HCatException + */ + public static Builder create(String dbName, String tableName, String location, + Map partSpec) throws HCatException { + return new Builder(dbName, tableName, location, partSpec); + } + + Partition toHivePartition(Table hiveTable) throws HCatException { + Partition hivePtn = new Partition(); + hivePtn.setDbName(this.dbName); + hivePtn.setTableName(this.tableName); + + List pvals = new ArrayList(); + for (FieldSchema field : hiveTable.getPartitionKeys()) { + String val = partSpec.get(field.getName()); + if (val == null || val.length() == 0) { + throw new HCatException("create partition: Value for key " + + field.getName() + " is null or empty"); + } + pvals.add(val); + } + + hivePtn.setValues(pvals); + StorageDescriptor sd = new StorageDescriptor(hiveTable.getSd()); + hivePtn.setSd(sd); + hivePtn.setParameters(hiveTable.getParameters()); + if (this.location != null) { + hivePtn.getSd().setLocation(this.location); + } else { + String partName; + try { + partName = Warehouse.makePartName( + hiveTable.getPartitionKeys(), pvals); + LOG.info("Setting partition location to :" + partName); + } catch (MetaException e) { + throw new HCatException("Exception while creating partition name.", e); + } + Path partPath = new Path(hiveTable.getSd().getLocation(), partName); + hivePtn.getSd().setLocation(partPath.toString()); + } + hivePtn.setCreateTime((int) (System.currentTimeMillis() / 1000)); + hivePtn.setLastAccessTimeIsSet(false); + return hivePtn; + } + + public static class Builder { + + private String tableName; + private String location; + private Map values; + private String dbName; + + private Builder(String dbName, String tableName, String location, Map values) { + this.dbName = dbName; + this.tableName = tableName; + this.location = location; + this.values = values; + } + + /** + * Builds the HCatAddPartitionDesc. + * + * @return the h cat add partition desc + * @throws HCatException + */ + public HCatAddPartitionDesc build() throws HCatException { + if (this.dbName == null) { + this.dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; + } + HCatAddPartitionDesc desc = new HCatAddPartitionDesc( + this.dbName, this.tableName, this.location, + this.values); + return desc; + } + } + +} diff --git hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClient.java hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClient.java new file mode 100644 index 0000000..518d342 --- /dev/null +++ hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClient.java @@ -0,0 +1,361 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.api; + +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.JavaUtils; +import org.apache.hadoop.hive.metastore.api.PartitionEventType; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; + +/** + * The abstract class HCatClient containing APIs for HCatalog DDL commands. + */ +public abstract class HCatClient { + + public enum DropDBMode {RESTRICT, CASCADE} + + public static final String HCAT_CLIENT_IMPL_CLASS = "hcat.client.impl.class"; + + /** + * Creates an instance of HCatClient. + * + * @param conf An instance of configuration. + * @return An instance of HCatClient. + * @throws HCatException + */ + public static HCatClient create(Configuration conf) throws HCatException { + HCatClient client = null; + String className = conf.get(HCAT_CLIENT_IMPL_CLASS, + HCatClientHMSImpl.class.getName()); + try { + Class clientClass = Class.forName(className, + true, JavaUtils.getClassLoader()).asSubclass( + HCatClient.class); + client = (HCatClient) clientClass.newInstance(); + } catch (ClassNotFoundException e) { + throw new HCatException( + "ClassNotFoundException while creating client class.", e); + } catch (InstantiationException e) { + throw new HCatException( + "InstantiationException while creating client class.", e); + } catch (IllegalAccessException e) { + throw new HCatException( + "IllegalAccessException while creating client class.", e); + } + if (client != null) { + client.initialize(conf); + } + return client; + } + + abstract void initialize(Configuration conf) throws HCatException; + + /** + * Get all existing databases that match the given + * pattern. The matching occurs as per Java regular expressions + * + * @param pattern java re pattern + * @return list of database names + * @throws HCatException + */ + public abstract List listDatabaseNamesByPattern(String pattern) + throws HCatException; + + /** + * Gets the database. + * + * @param dbName The name of the database. + * @return An instance of HCatDatabaseInfo. + * @throws HCatException + */ + public abstract HCatDatabase getDatabase(String dbName) throws HCatException; + + /** + * Creates the database. + * + * @param dbInfo An instance of HCatCreateDBDesc. + * @throws HCatException + */ + public abstract void createDatabase(HCatCreateDBDesc dbInfo) + throws HCatException; + + /** + * Drops a database. + * + * @param dbName The name of the database to delete. + * @param ifExists Hive returns an error if the database specified does not exist, + * unless ifExists is set to true. + * @param mode This is set to either "restrict" or "cascade". Restrict will + * remove the schema if all the tables are empty. Cascade removes + * everything including data and definitions. + * @throws HCatException + */ + public abstract void dropDatabase(String dbName, boolean ifExists, + DropDBMode mode) throws HCatException; + + /** + * Returns all existing tables from the specified database which match the given + * pattern. The matching occurs as per Java regular expressions. + * @param dbName The name of the DB (to be searched) + * @param tablePattern The regex for the table-name + * @return list of table names + * @throws HCatException + */ + public abstract List listTableNamesByPattern(String dbName, String tablePattern) + throws HCatException; + + /** + * Gets the table. + * + * @param dbName The name of the database. + * @param tableName The name of the table. + * @return An instance of HCatTableInfo. + * @throws HCatException + */ + public abstract HCatTable getTable(String dbName, String tableName) + throws HCatException; + + /** + * Creates the table. + * + * @param createTableDesc An instance of HCatCreateTableDesc class. + * @throws HCatException + */ + public abstract void createTable(HCatCreateTableDesc createTableDesc) throws HCatException; + + /** + * Updates the Table's column schema to the specified definition. + * + * @param dbName The name of the database. + * @param tableName The name of the table. + * @param columnSchema The (new) definition of the column schema (i.e. list of fields). + * + */ + public abstract void updateTableSchema(String dbName, String tableName, List columnSchema) + throws HCatException; + + /** + * Creates the table like an existing table. + * + * @param dbName The name of the database. + * @param existingTblName The name of the existing table. + * @param newTableName The name of the new table. + * @param ifNotExists If true, then error related to already table existing is skipped. + * @param isExternal Set to "true", if table has be created at a different + * location other than default. + * @param location The location for the table. + * @throws HCatException + */ + public abstract void createTableLike(String dbName, String existingTblName, + String newTableName, boolean ifNotExists, boolean isExternal, + String location) throws HCatException; + + /** + * Drop table. + * + * @param dbName The name of the database. + * @param tableName The name of the table. + * @param ifExists Hive returns an error if the database specified does not exist, + * unless ifExists is set to true. + * @throws HCatException + */ + public abstract void dropTable(String dbName, String tableName, + boolean ifExists) throws HCatException; + + /** + * Renames a table. + * + * @param dbName The name of the database. + * @param oldName The name of the table to be renamed. + * @param newName The new name of the table. + * @throws HCatException + */ + public abstract void renameTable(String dbName, String oldName, + String newName) throws HCatException; + + /** + * Gets all the partitions. + * + * @param dbName The name of the database. + * @param tblName The name of the table. + * @return A list of partitions. + * @throws HCatException + */ + public abstract List getPartitions(String dbName, String tblName) + throws HCatException; + + /** + * Gets all the partitions that match the specified (and possibly partial) partition specification. + * A partial partition-specification is one where not all partition-keys have associated values. For example, + * for a table ('myDb.myTable') with 2 partition keys (dt string, region string), + * if for each dt ('20120101', '20120102', etc.) there can exist 3 regions ('us', 'uk', 'in'), then, + * 1. Complete partition spec: getPartitions('myDb', 'myTable', {dt='20120101', region='us'}) would return 1 partition. + * 2. Partial partition spec: getPartitions('myDb', 'myTable', {dt='20120101'}) would return all 3 partitions, + * with dt='20120101' (i.e. region = 'us', 'uk' and 'in'). + * @param dbName The name of the database. + * @param tblName The name of the table. + * @param partitionSpec The partition specification. (Need not include all partition keys.) + * @return A list of partitions. + * @throws HCatException + */ + public abstract List getPartitions(String dbName, String tblName, Map partitionSpec) + throws HCatException; + + /** + * Gets the partition. + * + * @param dbName The database name. + * @param tableName The table name. + * @param partitionSpec The partition specification, {[col_name,value],[col_name2,value2]}. All partition-key-values + * must be specified. + * @return An instance of HCatPartitionInfo. + * @throws HCatException + */ + public abstract HCatPartition getPartition(String dbName, String tableName, + Map partitionSpec) throws HCatException; + + /** + * Adds the partition. + * + * @param partInfo An instance of HCatAddPartitionDesc. + * @throws HCatException + */ + public abstract void addPartition(HCatAddPartitionDesc partInfo) + throws HCatException; + + /** + * Adds a list of partitions. + * + * @param partInfoList A list of HCatAddPartitionDesc. + * @return The number of partitions added. + * @throws HCatException + */ + public abstract int addPartitions(List partInfoList) + throws HCatException; + + /** + * Drops partition(s) that match the specified (and possibly partial) partition specification. + * A partial partition-specification is one where not all partition-keys have associated values. For example, + * for a table ('myDb.myTable') with 2 partition keys (dt string, region string), + * if for each dt ('20120101', '20120102', etc.) there can exist 3 regions ('us', 'uk', 'in'), then, + * 1. Complete partition spec: dropPartitions('myDb', 'myTable', {dt='20120101', region='us'}) would drop 1 partition. + * 2. Partial partition spec: dropPartitions('myDb', 'myTable', {dt='20120101'}) would drop all 3 partitions, + * with dt='20120101' (i.e. region = 'us', 'uk' and 'in'). + * @param dbName The database name. + * @param tableName The table name. + * @param partitionSpec The partition specification, {[col_name,value],[col_name2,value2]}. + * @param ifExists Hive returns an error if the partition specified does not exist, unless ifExists is set to true. + * @throws HCatException,ConnectionFailureException + */ + public abstract void dropPartitions(String dbName, String tableName, + Map partitionSpec, boolean ifExists) + throws HCatException; + + /** + * List partitions by filter. + * + * @param dbName The database name. + * @param tblName The table name. + * @param filter The filter string, + * for example "part1 = \"p1_abc\" and part2 <= "\p2_test\"". Filtering can + * be done only on string partition keys. + * @return list of partitions + * @throws HCatException + */ + public abstract List listPartitionsByFilter(String dbName, String tblName, + String filter) throws HCatException; + + /** + * Mark partition for event. + * + * @param dbName The database name. + * @param tblName The table name. + * @param partKVs the key-values associated with the partition. + * @param eventType the event type + * @throws HCatException + */ + public abstract void markPartitionForEvent(String dbName, String tblName, + Map partKVs, PartitionEventType eventType) + throws HCatException; + + /** + * Checks if a partition is marked for event. + * + * @param dbName the db name + * @param tblName the table name + * @param partKVs the key-values associated with the partition. + * @param eventType the event type + * @return true, if is partition marked for event + * @throws HCatException + */ + public abstract boolean isPartitionMarkedForEvent(String dbName, String tblName, + Map partKVs, PartitionEventType eventType) + throws HCatException; + + /** + * Gets the delegation token. + * + * @param owner the owner + * @param renewerKerberosPrincipalName the renewer kerberos principal name + * @return the delegation token + * @throws HCatException,ConnectionFailureException + */ + public abstract String getDelegationToken(String owner, + String renewerKerberosPrincipalName) throws HCatException; + + /** + * Renew delegation token. + * + * @param tokenStrForm the token string + * @return the new expiration time + * @throws HCatException + */ + public abstract long renewDelegationToken(String tokenStrForm) + throws HCatException; + + /** + * Cancel delegation token. + * + * @param tokenStrForm the token string + * @throws HCatException + */ + public abstract void cancelDelegationToken(String tokenStrForm) + throws HCatException; + + /** + * Retrieve Message-bus topic for a table. + * + * @param dbName The name of the DB. + * @param tableName The name of the table. + * @return Topic-name for the message-bus on which messages will be sent for the specified table. + * By default, this is set to .. Returns null when not set. + */ + public abstract String getMessageBusTopicName(String dbName, String tableName) throws HCatException; + + /** + * Close the hcatalog client. + * + * @throws HCatException + */ + public abstract void close() throws HCatException; +} diff --git hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java new file mode 100644 index 0000000..aa5f6ea --- /dev/null +++ hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatClientHMSImpl.java @@ -0,0 +1,723 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.api; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; +import org.apache.hadoop.hive.metastore.api.InvalidOperationException; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.PartitionEventType; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.UnknownDBException; +import org.apache.hadoop.hive.metastore.api.UnknownTableException; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.common.HCatUtil; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils; +import org.apache.thrift.TException; + +/** + * The HCatClientHMSImpl is the Hive Metastore client based implementation of + * HCatClient. + */ +public class HCatClientHMSImpl extends HCatClient { + + private HiveMetaStoreClient hmsClient; + private Configuration config; + private HiveConf hiveConfig; + + @Override + public List listDatabaseNamesByPattern(String pattern) + throws HCatException { + List dbNames = null; + try { + dbNames = hmsClient.getDatabases(pattern); + } catch (MetaException exp) { + throw new HCatException("MetaException while listing db names", exp); + } + return dbNames; + } + + @Override + public HCatDatabase getDatabase(String dbName) throws HCatException { + HCatDatabase db = null; + try { + Database hiveDB = hmsClient.getDatabase(checkDB(dbName)); + if (hiveDB != null) { + db = new HCatDatabase(hiveDB); + } + } catch (NoSuchObjectException exp) { + throw new ObjectNotFoundException( + "NoSuchObjectException while fetching database", exp); + } catch (MetaException exp) { + throw new HCatException("MetaException while fetching database", + exp); + } catch (TException exp) { + throw new ConnectionFailureException( + "TException while fetching database", exp); + } + return db; + } + + @Override + public void createDatabase(HCatCreateDBDesc dbInfo) throws HCatException { + try { + hmsClient.createDatabase(dbInfo.toHiveDb()); + } catch (AlreadyExistsException exp) { + if (!dbInfo.getIfNotExists()) { + throw new HCatException( + "AlreadyExistsException while creating database", exp); + } + } catch (InvalidObjectException exp) { + throw new HCatException( + "InvalidObjectException while creating database", exp); + } catch (MetaException exp) { + throw new HCatException("MetaException while creating database", + exp); + } catch (TException exp) { + throw new ConnectionFailureException( + "TException while creating database", exp); + } + } + + @Override + public void dropDatabase(String dbName, boolean ifExists, DropDBMode mode) + throws HCatException { + boolean isCascade = mode.toString().equalsIgnoreCase("cascade"); + try { + hmsClient.dropDatabase(checkDB(dbName), true, ifExists, isCascade); + } catch (NoSuchObjectException e) { + if (!ifExists) { + throw new ObjectNotFoundException( + "NoSuchObjectException while dropping db.", e); + } + } catch (InvalidOperationException e) { + throw new HCatException( + "InvalidOperationException while dropping db.", e); + } catch (MetaException e) { + throw new HCatException("MetaException while dropping db.", e); + } catch (TException e) { + throw new ConnectionFailureException("TException while dropping db.", + e); + } + } + + @Override + public List listTableNamesByPattern(String dbName, + String tablePattern) throws HCatException { + List tableNames = null; + try { + tableNames = hmsClient.getTables(checkDB(dbName), tablePattern); + } catch (MetaException e) { + throw new HCatException( + "MetaException while fetching table names.", e); + } + return tableNames; + } + + @Override + public HCatTable getTable(String dbName, String tableName) + throws HCatException { + HCatTable table = null; + try { + Table hiveTable = hmsClient.getTable(checkDB(dbName), tableName); + if (hiveTable != null) { + table = new HCatTable(hiveTable); + } + } catch (MetaException e) { + throw new HCatException("MetaException while fetching table.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while fetching table.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while fetching table.", e); + } + return table; + } + + @Override + public void createTable(HCatCreateTableDesc createTableDesc) + throws HCatException { + try { + hmsClient.createTable(createTableDesc.toHiveTable(hiveConfig)); + } catch (AlreadyExistsException e) { + if (!createTableDesc.getIfNotExists()) { + throw new HCatException( + "AlreadyExistsException while creating table.", e); + } + } catch (InvalidObjectException e) { + throw new HCatException( + "InvalidObjectException while creating table.", e); + } catch (MetaException e) { + throw new HCatException("MetaException while creating table.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while creating table.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while creating table.", e); + } catch (IOException e) { + throw new HCatException("IOException while creating hive conf.", e); + } + + } + + @Override + public void updateTableSchema(String dbName, String tableName, List columnSchema) + throws HCatException { + try { + Table table = hmsClient.getTable(dbName, tableName); + table.getSd().setCols(HCatSchemaUtils.getFieldSchemas(columnSchema)); + hmsClient.alter_table(dbName, tableName, table); + } + catch (InvalidOperationException e) { + throw new HCatException("InvalidOperationException while updating table schema.", e); + } + catch (MetaException e) { + throw new HCatException("MetaException while updating table schema.", e); + } + catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while updating table schema.", e); + } + catch (TException e) { + throw new ConnectionFailureException( + "TException while updating table schema.", e); + } + } + + @Override + public void createTableLike(String dbName, String existingTblName, + String newTableName, boolean ifNotExists, boolean isExternal, + String location) throws HCatException { + + Table hiveTable = getHiveTableLike(checkDB(dbName), existingTblName, + newTableName, ifNotExists, location); + if (hiveTable != null) { + try { + hmsClient.createTable(hiveTable); + } catch (AlreadyExistsException e) { + if (!ifNotExists) { + throw new HCatException( + "A table already exists with the name " + + newTableName, e); + } + } catch (InvalidObjectException e) { + throw new HCatException( + "InvalidObjectException in create table like command.", + e); + } catch (MetaException e) { + throw new HCatException( + "MetaException in create table like command.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException in create table like command.", + e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException in create table like command.", e); + } + } + } + + @Override + public void dropTable(String dbName, String tableName, boolean ifExists) + throws HCatException { + try { + hmsClient.dropTable(checkDB(dbName), tableName, true, ifExists); + } catch (NoSuchObjectException e) { + if (!ifExists) { + throw new ObjectNotFoundException( + "NoSuchObjectException while dropping table.", e); + } + } catch (MetaException e) { + throw new HCatException("MetaException while dropping table.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while dropping table.", e); + } + } + + @Override + public void renameTable(String dbName, String oldName, String newName) + throws HCatException { + Table tbl; + try { + Table oldtbl = hmsClient.getTable(checkDB(dbName), oldName); + if (oldtbl != null) { + // TODO : Should be moved out. + if (oldtbl + .getParameters() + .get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE) != null) { + throw new HCatException( + "Cannot use rename command on a non-native table"); + } + tbl = new Table(oldtbl); + tbl.setTableName(newName); + hmsClient.alter_table(checkDB(dbName), oldName, tbl); + } + } catch (MetaException e) { + throw new HCatException("MetaException while renaming table", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while renaming table", e); + } catch (InvalidOperationException e) { + throw new HCatException( + "InvalidOperationException while renaming table", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while renaming table", e); + } + } + + @Override + public List getPartitions(String dbName, String tblName) + throws HCatException { + List hcatPtns = new ArrayList(); + try { + List hivePtns = hmsClient.listPartitions( + checkDB(dbName), tblName, (short) -1); + for (Partition ptn : hivePtns) { + hcatPtns.add(new HCatPartition(ptn)); + } + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while retrieving partition.", e); + } catch (MetaException e) { + throw new HCatException( + "MetaException while retrieving partition.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while retrieving partition.", e); + } + return hcatPtns; + } + + @Override + public List getPartitions(String dbName, String tblName, Map partitionSpec) throws HCatException { + return listPartitionsByFilter(dbName, tblName, getFilterString(partitionSpec)); + } + + private static String getFilterString(Map partitionSpec) { + final String AND = " AND "; + + StringBuilder filter = new StringBuilder(); + for (Map.Entry entry : partitionSpec.entrySet()) { + filter.append(entry.getKey()).append("=").append("\"").append(entry.getValue()).append("\"").append(AND); + } + + int length = filter.toString().length(); + if (length > 0) + filter.delete(length - AND.length(), length); + + return filter.toString(); + } + + @Override + public HCatPartition getPartition(String dbName, String tableName, + Map partitionSpec) throws HCatException { + HCatPartition partition = null; + try { + List partitionColumns = getTable(checkDB(dbName), tableName).getPartCols(); + if (partitionColumns.size() != partitionSpec.size()) { + throw new HCatException("Partition-spec doesn't have the right number of partition keys."); + } + + ArrayList ptnValues = new ArrayList(); + for (HCatFieldSchema partitionColumn : partitionColumns) { + String partKey = partitionColumn.getName(); + if (partitionSpec.containsKey(partKey)) { + ptnValues.add(partitionSpec.get(partKey)); // Partition-keys added in order. + } + else { + throw new HCatException("Invalid partition-key specified: " + partKey); + } + } + Partition hivePartition = hmsClient.getPartition(checkDB(dbName), + tableName, ptnValues); + if (hivePartition != null) { + partition = new HCatPartition(hivePartition); + } + } catch (MetaException e) { + throw new HCatException( + "MetaException while retrieving partition.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while retrieving partition.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while retrieving partition.", e); + } + return partition; + } + + @Override + public void addPartition(HCatAddPartitionDesc partInfo) + throws HCatException { + Table tbl = null; + try { + tbl = hmsClient.getTable(partInfo.getDatabaseName(), + partInfo.getTableName()); + // TODO: Should be moved out. + if (tbl.getPartitionKeysSize() == 0) { + throw new HCatException("The table " + partInfo.getTableName() + + " is not partitioned."); + } + + hmsClient.add_partition(partInfo.toHivePartition(tbl)); + } catch (InvalidObjectException e) { + throw new HCatException( + "InvalidObjectException while adding partition.", e); + } catch (AlreadyExistsException e) { + throw new HCatException( + "AlreadyExistsException while adding partition.", e); + } catch (MetaException e) { + throw new HCatException("MetaException while adding partition.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException("The table " + partInfo.getTableName() + + " is could not be found.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while adding partition.", e); + } + } + + @Override + public void dropPartitions(String dbName, String tableName, + Map partitionSpec, boolean ifExists) + throws HCatException { + try { + dbName = checkDB(dbName); + List partitions = hmsClient.listPartitionsByFilter(dbName, tableName, + getFilterString(partitionSpec), (short)-1); + + for (Partition partition : partitions) { + dropPartition(partition, ifExists); + } + + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while dropping partition. " + + "Either db(" + dbName + ") or table(" + tableName + ") missing.", e); + } catch (MetaException e) { + throw new HCatException("MetaException while dropping partition.", + e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while dropping partition.", e); + } + } + + private void dropPartition(Partition partition, boolean ifExists) + throws HCatException, MetaException, TException { + try { + hmsClient.dropPartition(partition.getDbName(), partition.getTableName(), partition.getValues()); + } catch (NoSuchObjectException e) { + if (!ifExists) { + throw new ObjectNotFoundException( + "NoSuchObjectException while dropping partition: " + partition.getValues(), e); + } + } + } + + @Override + public List listPartitionsByFilter(String dbName, + String tblName, String filter) throws HCatException { + List hcatPtns = new ArrayList(); + try { + List hivePtns = hmsClient.listPartitionsByFilter( + checkDB(dbName), tblName, filter, (short) -1); + for (Partition ptn : hivePtns) { + hcatPtns.add(new HCatPartition(ptn)); + } + } catch (MetaException e) { + throw new HCatException("MetaException while fetching partitions.", + e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while fetching partitions.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while fetching partitions.", e); + } + return hcatPtns; + } + + @Override + public void markPartitionForEvent(String dbName, String tblName, + Map partKVs, PartitionEventType eventType) + throws HCatException { + try { + hmsClient.markPartitionForEvent(checkDB(dbName), tblName, partKVs, + eventType); + } catch (MetaException e) { + throw new HCatException( + "MetaException while marking partition for event.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while marking partition for event.", + e); + } catch (UnknownTableException e) { + throw new HCatException( + "UnknownTableException while marking partition for event.", + e); + } catch (UnknownDBException e) { + throw new HCatException( + "UnknownDBException while marking partition for event.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while marking partition for event.", e); + } + } + + @Override + public boolean isPartitionMarkedForEvent(String dbName, String tblName, + Map partKVs, PartitionEventType eventType) + throws HCatException { + boolean isMarked = false; + try { + isMarked = hmsClient.isPartitionMarkedForEvent(checkDB(dbName), + tblName, partKVs, eventType); + } catch (MetaException e) { + throw new HCatException( + "MetaException while checking partition for event.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException( + "NoSuchObjectException while checking partition for event.", + e); + } catch (UnknownTableException e) { + throw new HCatException( + "UnknownTableException while checking partition for event.", + e); + } catch (UnknownDBException e) { + throw new HCatException( + "UnknownDBException while checking partition for event.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while checking partition for event.", e); + } + return isMarked; + } + + @Override + public String getDelegationToken(String owner, + String renewerKerberosPrincipalName) throws HCatException { + String token = null; + try { + token = hmsClient.getDelegationToken(owner, + renewerKerberosPrincipalName); + } catch (MetaException e) { + throw new HCatException( + "MetaException while getting delegation token.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while getting delegation token.", e); + } + + return token; + } + + @Override + public long renewDelegationToken(String tokenStrForm) throws HCatException { + long time = 0; + try { + time = hmsClient.renewDelegationToken(tokenStrForm); + } catch (MetaException e) { + throw new HCatException( + "MetaException while renewing delegation token.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while renewing delegation token.", e); + } + + return time; + } + + @Override + public void cancelDelegationToken(String tokenStrForm) + throws HCatException { + try { + hmsClient.cancelDelegationToken(tokenStrForm); + } catch (MetaException e) { + throw new HCatException( + "MetaException while canceling delegation token.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while canceling delegation token.", e); + } + } + + /* + * @param conf /* @throws HCatException,ConnectionFailureException + * + * @see + * org.apache.hive.hcatalog.api.HCatClient#initialize(org.apache.hadoop.conf. + * Configuration) + */ + @Override + void initialize(Configuration conf) throws HCatException { + this.config = conf; + try { + hiveConfig = HCatUtil.getHiveConf(config); + hmsClient = HCatUtil.getHiveClient(hiveConfig); + } catch (MetaException exp) { + throw new HCatException("MetaException while creating HMS client", + exp); + } catch (IOException exp) { + throw new HCatException("IOException while creating HMS client", + exp); + } + + } + + private Table getHiveTableLike(String dbName, String existingTblName, + String newTableName, boolean isExternal, String location) + throws HCatException { + Table oldtbl = null; + Table newTable = null; + try { + oldtbl = hmsClient.getTable(checkDB(dbName), existingTblName); + } catch (MetaException e1) { + throw new HCatException( + "MetaException while retrieving existing table.", e1); + } catch (NoSuchObjectException e1) { + throw new ObjectNotFoundException( + "NoSuchObjectException while retrieving existing table.", + e1); + } catch (TException e1) { + throw new ConnectionFailureException( + "TException while retrieving existing table.", e1); + } + if (oldtbl != null) { + newTable = new Table(); + newTable.setTableName(newTableName); + newTable.setDbName(dbName); + StorageDescriptor sd = new StorageDescriptor(oldtbl.getSd()); + newTable.setSd(sd); + newTable.setParameters(oldtbl.getParameters()); + if (location == null) { + newTable.getSd().setLocation(oldtbl.getSd().getLocation()); + } else { + newTable.getSd().setLocation(location); + } + if (isExternal) { + newTable.putToParameters("EXTERNAL", "TRUE"); + newTable.setTableType(TableType.EXTERNAL_TABLE.toString()); + } else { + newTable.getParameters().remove("EXTERNAL"); + } + // set create time + newTable.setCreateTime((int) (System.currentTimeMillis() / 1000)); + newTable.setLastAccessTimeIsSet(false); + } + return newTable; + } + + /* + * @throws HCatException + * + * @see org.apache.hive.hcatalog.api.HCatClient#closeClient() + */ + @Override + public void close() throws HCatException { + hmsClient.close(); + } + + private String checkDB(String name) { + if (StringUtils.isEmpty(name)) { + return MetaStoreUtils.DEFAULT_DATABASE_NAME; + } else { + return name; + } + } + + /* + * @param partInfoList + * @return The size of the list of partitions. + * @throws HCatException,ConnectionFailureException + * @see org.apache.hive.hcatalog.api.HCatClient#addPartitions(java.util.List) + */ + @Override + public int addPartitions(List partInfoList) + throws HCatException { + int numPartitions = -1; + if ((partInfoList == null) || (partInfoList.size() == 0)) { + throw new HCatException("The partition list is null or empty."); + } + + Table tbl = null; + try { + tbl = hmsClient.getTable(partInfoList.get(0).getDatabaseName(), + partInfoList.get(0).getTableName()); + ArrayList ptnList = new ArrayList(); + for (HCatAddPartitionDesc desc : partInfoList) { + ptnList.add(desc.toHivePartition(tbl)); + } + numPartitions = hmsClient.add_partitions(ptnList); + } catch (InvalidObjectException e) { + throw new HCatException( + "InvalidObjectException while adding partition.", e); + } catch (AlreadyExistsException e) { + throw new HCatException( + "AlreadyExistsException while adding partition.", e); + } catch (MetaException e) { + throw new HCatException("MetaException while adding partition.", e); + } catch (NoSuchObjectException e) { + throw new ObjectNotFoundException("The table " + + partInfoList.get(0).getTableName() + + " is could not be found.", e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while adding partition.", e); + } + return numPartitions; + } + + @Override + public String getMessageBusTopicName(String dbName, String tableName) throws HCatException { + try { + return hmsClient.getTable(dbName, tableName).getParameters().get(HCatConstants.HCAT_MSGBUS_TOPIC_NAME); + } + catch (MetaException e) { + throw new HCatException("MetaException while retrieving JMS Topic name.", e); + } catch (NoSuchObjectException e) { + throw new HCatException("Could not find DB:" + dbName + " or Table:" + tableName, e); + } catch (TException e) { + throw new ConnectionFailureException( + "TException while retrieving JMS Topic name.", e); + } + } +} diff --git hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateDBDesc.java hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateDBDesc.java new file mode 100644 index 0000000..acda55b --- /dev/null +++ hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateDBDesc.java @@ -0,0 +1,194 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.api; + +import java.util.Map; + +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hive.hcatalog.common.HCatException; + +/** + * The Class HCatCreateDBDesc for defining database attributes. + */ +public class HCatCreateDBDesc { + + private String dbName; + private String locationUri; + private String comment; + private Map dbProperties; + private boolean ifNotExits = false; + + /** + * Gets the database properties. + * + * @return the database properties + */ + public Map getDatabaseProperties() { + return this.dbProperties; + } + + /** + * Gets the if not exists. + * + * @return the if not exists + */ + public boolean getIfNotExists() { + return this.ifNotExits; + } + + /** + * Gets the comments. + * + * @return the comments + */ + public String getComments() { + return this.comment; + } + + /** + * Gets the location. + * + * @return the location + */ + public String getLocation() { + return this.locationUri; + } + + /** + * Gets the database name. + * + * @return the database name + */ + public String getDatabaseName() { + return this.dbName; + } + + private HCatCreateDBDesc(String dbName) { + this.dbName = dbName; + } + + @Override + public String toString() { + return "HCatCreateDBDesc [" + + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") + + (locationUri != null ? "location=" + locationUri + ", " + : "location=null") + + (comment != null ? "comment=" + comment + ", " : "comment=null") + + (dbProperties != null ? "dbProperties=" + dbProperties + ", " + : "dbProperties=null") + "ifNotExits=" + ifNotExits + "]"; + } + + /** + * Creates the builder for defining attributes. + * + * @param dbName the db name + * @return the builder + */ + public static Builder create(String dbName) { + return new Builder(dbName); + } + + Database toHiveDb() { + Database hiveDB = new Database(); + hiveDB.setDescription(this.comment); + hiveDB.setLocationUri(this.locationUri); + hiveDB.setName(this.dbName); + hiveDB.setParameters(this.dbProperties); + return hiveDB; + } + + public static class Builder { + + private String innerLoc; + private String innerComment; + private Map innerDBProps; + private String dbName; + private boolean ifNotExists = false; + + private Builder(String dbName) { + this.dbName = dbName; + } + + /** + * Location. + * + * @param value the location of the database. + * @return the builder + */ + public Builder location(String value) { + this.innerLoc = value; + return this; + } + + /** + * Comment. + * + * @param value comments. + * @return the builder + */ + public Builder comment(String value) { + this.innerComment = value; + return this; + } + + /** + * If not exists. + * @param ifNotExists If set to true, hive will not throw exception, if a + * database with the same name already exists. + * @return the builder + */ + public Builder ifNotExists(boolean ifNotExists) { + this.ifNotExists = ifNotExists; + return this; + } + + /** + * Database properties. + * + * @param dbProps the database properties + * @return the builder + */ + public Builder databaseProperties(Map dbProps) { + this.innerDBProps = dbProps; + return this; + } + + + /** + * Builds the create database descriptor. + * + * @return An instance of HCatCreateDBDesc + * @throws HCatException + */ + public HCatCreateDBDesc build() throws HCatException { + if (this.dbName == null) { + throw new HCatException("Database name cannot be null."); + } + HCatCreateDBDesc desc = new HCatCreateDBDesc(this.dbName); + desc.comment = this.innerComment; + desc.locationUri = this.innerLoc; + desc.dbProperties = this.innerDBProps; + desc.ifNotExits = this.ifNotExists; + return desc; + + } + + } + +} diff --git hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateTableDesc.java hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateTableDesc.java new file mode 100644 index 0000000..a28fa08 --- /dev/null +++ hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatCreateTableDesc.java @@ -0,0 +1,520 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.api; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; +import org.apache.hadoop.hive.ql.io.RCFileInputFormat; +import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; +import org.apache.hadoop.hive.ql.metadata.HiveUtils; +import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; +import org.apache.hadoop.mapred.SequenceFileInputFormat; +import org.apache.hadoop.mapred.SequenceFileOutputFormat; +import org.apache.hadoop.mapred.TextInputFormat; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * The Class HCatCreateTableDesc for defining attributes for a new table. + */ +@SuppressWarnings("deprecation") +public class HCatCreateTableDesc { + + private static final Logger LOG = LoggerFactory.getLogger(HCatCreateTableDesc.class); + + private String tableName; + private String dbName; + private boolean isExternal; + private String comment; + private String location; + private List cols; + private List partCols; + private List bucketCols; + private int numBuckets; + private List sortCols; + private Map tblProps; + private boolean ifNotExists; + private String fileFormat; + private String inputformat; + private String outputformat; + private String serde; + private String storageHandler; + + private HCatCreateTableDesc(String dbName, String tableName, List columns) { + this.dbName = dbName; + this.tableName = tableName; + this.cols = columns; + } + + /** + * Creates a builder for defining attributes. + * + * @param dbName the db name + * @param tableName the table name + * @param columns the columns + * @return the builder + */ + public static Builder create(String dbName, String tableName, List columns) { + return new Builder(dbName, tableName, columns); + } + + Table toHiveTable(HiveConf conf) throws HCatException { + + Table newTable = new Table(); + newTable.setDbName(dbName); + newTable.setTableName(tableName); + if (tblProps != null) { + newTable.setParameters(tblProps); + } + + if (isExternal) { + newTable.putToParameters("EXTERNAL", "TRUE"); + newTable.setTableType(TableType.EXTERNAL_TABLE.toString()); + } else { + newTable.setTableType(TableType.MANAGED_TABLE.toString()); + } + + StorageDescriptor sd = new StorageDescriptor(); + sd.setSerdeInfo(new SerDeInfo()); + if (location != null) { + sd.setLocation(location); + } + if (this.comment != null) { + newTable.putToParameters("comment", comment); + } + if (!StringUtils.isEmpty(fileFormat)) { + sd.setInputFormat(inputformat); + sd.setOutputFormat(outputformat); + if (serde != null) { + sd.getSerdeInfo().setSerializationLib(serde); + } else { + LOG.info("Using LazySimpleSerDe for table " + tableName); + sd.getSerdeInfo() + .setSerializationLib( + org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class + .getName()); + } + } else { + try { + LOG.info("Creating instance of storage handler to get input/output, serder info."); + HiveStorageHandler sh = HiveUtils.getStorageHandler(conf, + storageHandler); + sd.setInputFormat(sh.getInputFormatClass().getName()); + sd.setOutputFormat(sh.getOutputFormatClass().getName()); + sd.getSerdeInfo().setSerializationLib( + sh.getSerDeClass().getName()); + newTable.putToParameters( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, + storageHandler); + } catch (HiveException e) { + throw new HCatException( + "Exception while creating instance of storage handler", + e); + } + } + newTable.setSd(sd); + if (this.partCols != null) { + ArrayList hivePtnCols = new ArrayList(); + for (HCatFieldSchema fs : this.partCols) { + hivePtnCols.add(HCatSchemaUtils.getFieldSchema(fs)); + } + newTable.setPartitionKeys(hivePtnCols); + } + + if (this.cols != null) { + ArrayList hiveTblCols = new ArrayList(); + for (HCatFieldSchema fs : this.cols) { + hiveTblCols.add(HCatSchemaUtils.getFieldSchema(fs)); + } + newTable.getSd().setCols(hiveTblCols); + } + + if (this.bucketCols != null) { + newTable.getSd().setBucketCols(bucketCols); + newTable.getSd().setNumBuckets(numBuckets); + } + + if (this.sortCols != null) { + newTable.getSd().setSortCols(sortCols); + } + + newTable.setCreateTime((int) (System.currentTimeMillis() / 1000)); + newTable.setLastAccessTimeIsSet(false); + return newTable; + } + + /** + * Gets the if not exists. + * + * @return the if not exists + */ + public boolean getIfNotExists() { + return this.ifNotExists; + } + + /** + * Gets the table name. + * + * @return the table name + */ + public String getTableName() { + return this.tableName; + } + + /** + * Gets the cols. + * + * @return the cols + */ + public List getCols() { + return this.cols; + } + + /** + * Gets the partition cols. + * + * @return the partition cols + */ + public List getPartitionCols() { + return this.partCols; + } + + /** + * Gets the bucket cols. + * + * @return the bucket cols + */ + public List getBucketCols() { + return this.bucketCols; + } + + public int getNumBuckets() { + return this.numBuckets; + } + + /** + * Gets the comments. + * + * @return the comments + */ + public String getComments() { + return this.comment; + } + + /** + * Gets the storage handler. + * + * @return the storage handler + */ + public String getStorageHandler() { + return this.storageHandler; + } + + /** + * Gets the location. + * + * @return the location + */ + public String getLocation() { + return this.location; + } + + /** + * Gets the external. + * + * @return the external + */ + public boolean getExternal() { + return this.isExternal; + } + + /** + * Gets the sort cols. + * + * @return the sort cols + */ + public List getSortCols() { + return this.sortCols; + } + + /** + * Gets the tbl props. + * + * @return the tbl props + */ + public Map getTblProps() { + return this.tblProps; + } + + /** + * Gets the file format. + * + * @return the file format + */ + public String getFileFormat() { + return this.fileFormat; + } + + /** + * Gets the database name. + * + * @return the database name + */ + public String getDatabaseName() { + return this.dbName; + } + + @Override + public String toString() { + return "HCatCreateTableDesc [" + + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") + + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") + + "isExternal=" + + isExternal + + ", " + + (comment != null ? "comment=" + comment + ", " : "comment=null") + + (location != null ? "location=" + location + ", " : "location=null") + + (cols != null ? "cols=" + cols + ", " : "cols=null") + + (partCols != null ? "partCols=" + partCols + ", " : "partCols=null") + + (bucketCols != null ? "bucketCols=" + bucketCols + ", " : "bucketCols=null") + + "numBuckets=" + + numBuckets + + ", " + + (sortCols != null ? "sortCols=" + sortCols + ", " : "sortCols=null") + + (tblProps != null ? "tblProps=" + tblProps + ", " : "tblProps=null") + + "ifNotExists=" + + ifNotExists + + ", " + + (fileFormat != null ? "fileFormat=" + fileFormat + ", " : "fileFormat=null") + + (inputformat != null ? "inputformat=" + inputformat + ", " + : "inputformat=null") + + (outputformat != null ? "outputformat=" + outputformat + ", " + : "outputformat=null") + + (serde != null ? "serde=" + serde + ", " : "serde=null") + + (storageHandler != null ? "storageHandler=" + storageHandler + : "storageHandler=null") + "]"; + } + + public static class Builder { + + private String tableName; + private boolean isExternal; + private List cols; + private List partCols; + private List bucketCols; + private List sortCols; + private int numBuckets; + private String comment; + private String fileFormat; + private String location; + private String storageHandler; + private Map tblProps; + private boolean ifNotExists; + private String dbName; + + + private Builder(String dbName, String tableName, List columns) { + this.dbName = dbName; + this.tableName = tableName; + this.cols = columns; + } + + + /** + * If not exists. + * + * @param ifNotExists If set to true, hive will not throw exception, if a + * table with the same name already exists. + * @return the builder + */ + public Builder ifNotExists(boolean ifNotExists) { + this.ifNotExists = ifNotExists; + return this; + } + + + /** + * Partition cols. + * + * @param partCols the partition cols + * @return the builder + */ + public Builder partCols(List partCols) { + this.partCols = partCols; + return this; + } + + + /** + * Bucket cols. + * + * @param bucketCols the bucket cols + * @return the builder + */ + public Builder bucketCols(List bucketCols, int buckets) { + this.bucketCols = bucketCols; + this.numBuckets = buckets; + return this; + } + + /** + * Storage handler. + * + * @param storageHandler the storage handler + * @return the builder + */ + public Builder storageHandler(String storageHandler) { + this.storageHandler = storageHandler; + return this; + } + + /** + * Location. + * + * @param location the location + * @return the builder + */ + public Builder location(String location) { + this.location = location; + return this; + } + + /** + * Comments. + * + * @param comment the comment + * @return the builder + */ + public Builder comments(String comment) { + this.comment = comment; + return this; + } + + /** + * Checks if is table external. + * + * @param isExternal the is external + * @return the builder + */ + public Builder isTableExternal(boolean isExternal) { + this.isExternal = isExternal; + return this; + } + + /** + * Sort cols. + * + * @param sortCols the sort cols + * @return the builder + */ + public Builder sortCols(ArrayList sortCols) { + this.sortCols = sortCols; + return this; + } + + /** + * Tbl props. + * + * @param tblProps the tbl props + * @return the builder + */ + public Builder tblProps(Map tblProps) { + this.tblProps = tblProps; + return this; + } + + /** + * File format. + * + * @param format the format + * @return the builder + */ + public Builder fileFormat(String format) { + this.fileFormat = format; + return this; + } + + /** + * Builds the HCatCreateTableDesc. + * + * @return HCatCreateTableDesc + * @throws HCatException + */ + public HCatCreateTableDesc build() throws HCatException { + if (this.dbName == null) { + LOG.info("Database name found null. Setting db to :" + + MetaStoreUtils.DEFAULT_DATABASE_NAME); + this.dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; + } + HCatCreateTableDesc desc = new HCatCreateTableDesc(this.dbName, + this.tableName, this.cols); + desc.ifNotExists = this.ifNotExists; + desc.isExternal = this.isExternal; + desc.comment = this.comment; + desc.partCols = this.partCols; + desc.bucketCols = this.bucketCols; + desc.numBuckets = this.numBuckets; + desc.location = this.location; + desc.tblProps = this.tblProps; + desc.sortCols = this.sortCols; + desc.serde = null; + if (!StringUtils.isEmpty(fileFormat)) { + desc.fileFormat = fileFormat; + if ("SequenceFile".equalsIgnoreCase(fileFormat)) { + desc.inputformat = SequenceFileInputFormat.class.getName(); + desc.outputformat = SequenceFileOutputFormat.class + .getName(); + } else if ("RCFile".equalsIgnoreCase(fileFormat)) { + desc.inputformat = RCFileInputFormat.class.getName(); + desc.outputformat = RCFileOutputFormat.class.getName(); + desc.serde = ColumnarSerDe.class.getName(); + } + desc.storageHandler = StringUtils.EMPTY; + } else if (!StringUtils.isEmpty(storageHandler)) { + desc.storageHandler = storageHandler; + } else { + desc.fileFormat = "TextFile"; + LOG.info("Using text file format for the table."); + desc.inputformat = TextInputFormat.class.getName(); + LOG.info("Table input format:" + desc.inputformat); + desc.outputformat = IgnoreKeyTextOutputFormat.class + .getName(); + LOG.info("Table output format:" + desc.outputformat); + } + return desc; + } + } +} diff --git hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatDatabase.java hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatDatabase.java new file mode 100644 index 0000000..4a0b935 --- /dev/null +++ hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatDatabase.java @@ -0,0 +1,88 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.api; + +import java.util.Map; + +import org.apache.hadoop.hive.metastore.api.Database; + + +/** + * HCatDatabase is wrapper class around org.apache.hadoop.hive.metastore.api.Database. + */ +public class HCatDatabase { + + private String dbName; + private String dbLocation; + private String comment; + private Map props; + + HCatDatabase(Database db) { + this.dbName = db.getName(); + this.props = db.getParameters(); + this.dbLocation = db.getLocationUri(); + this.comment = db.getDescription(); + } + + /** + * Gets the database name. + * + * @return the database name + */ + public String getName() { + return dbName; + } + + /** + * Gets the dB location. + * + * @return the dB location + */ + public String getLocation() { + return dbLocation; + } + + /** + * Gets the comment. + * + * @return the comment + */ + public String getComment() { + return comment; + } + + /** + * Gets the dB properties. + * + * @return the dB properties + */ + public Map getProperties() { + return props; + } + + @Override + public String toString() { + return "HCatDatabase [" + + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") + + (dbLocation != null ? "dbLocation=" + dbLocation + ", " : "dbLocation=null") + + (comment != null ? "comment=" + comment + ", " : "comment=null") + + (props != null ? "props=" + props : "props=null") + "]"; + } + +} diff --git hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatPartition.java hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatPartition.java new file mode 100644 index 0000000..99a6d6f --- /dev/null +++ hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatPartition.java @@ -0,0 +1,204 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.api; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils; + +/** + * The HCatPartition is a wrapper around org.apache.hadoop.hive.metastore.api.Partition. + */ +public class HCatPartition { + + private String tableName; + private String dbName; + private List values; + private List tableCols; + private int createTime; + private int lastAccessTime; + private StorageDescriptor sd; + private Map parameters; + + HCatPartition(Partition partition) throws HCatException { + this.tableName = partition.getTableName(); + this.dbName = partition.getDbName(); + this.createTime = partition.getCreateTime(); + this.lastAccessTime = partition.getLastAccessTime(); + this.parameters = partition.getParameters(); + this.values = partition.getValues(); + this.sd = partition.getSd(); + this.tableCols = new ArrayList(); + for (FieldSchema fs : this.sd.getCols()) { + this.tableCols.add(HCatSchemaUtils.getHCatFieldSchema(fs)); + } + } + + /** + * Gets the table name. + * + * @return the table name + */ + public String getTableName() { + return this.tableName; + } + + /** + * Gets the database name. + * + * @return the database name + */ + public String getDatabaseName() { + return this.dbName; + } + + /** + * Gets the columns of the table. + * + * @return the columns + */ + public List getColumns() { + return this.tableCols; + } + + /** + * Gets the input format. + * + * @return the input format + */ + public String getInputFormat() { + return this.sd.getInputFormat(); + } + + /** + * Gets the output format. + * + * @return the output format + */ + public String getOutputFormat() { + return this.sd.getOutputFormat(); + } + + /** + * Gets the storage handler. + * + * @return the storage handler + */ + public String getStorageHandler() { + return this.sd + .getParameters() + .get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE); + } + + /** + * Gets the location. + * + * @return the location + */ + public String getLocation() { + return this.sd.getLocation(); + } + + /** + * Gets the serde. + * + * @return the serde + */ + public String getSerDe() { + return this.sd.getSerdeInfo().getSerializationLib(); + } + + public Map getParameters() { + return this.parameters; + } + + /** + * Gets the last access time. + * + * @return the last access time + */ + public int getLastAccessTime() { + return this.lastAccessTime; + } + + /** + * Gets the creates the time. + * + * @return the creates the time + */ + public int getCreateTime() { + return this.createTime; + } + + /** + * Gets the values. + * + * @return the values + */ + public List getValues() { + return this.values; + } + + /** + * Gets the bucket columns. + * + * @return the bucket columns + */ + public List getBucketCols() { + return this.sd.getBucketCols(); + } + + /** + * Gets the number of buckets. + * + * @return the number of buckets + */ + public int getNumBuckets() { + return this.sd.getNumBuckets(); + } + + /** + * Gets the sort columns. + * + * @return the sort columns + */ + public List getSortCols() { + return this.sd.getSortCols(); + } + + @Override + public String toString() { + return "HCatPartition [" + + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") + + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") + + (values != null ? "values=" + values + ", " : "values=null") + + "createTime=" + createTime + ", lastAccessTime=" + + lastAccessTime + ", " + (sd != null ? "sd=" + sd + ", " : "sd=null") + + (parameters != null ? "parameters=" + parameters : "parameters=null") + "]"; + } + +} diff --git hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatTable.java hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatTable.java new file mode 100644 index 0000000..e43227b --- /dev/null +++ hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/HCatTable.java @@ -0,0 +1,227 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.api; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Order; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils; + +/** + * The HCatTable is a wrapper around org.apache.hadoop.hive.metastore.api.Table. + */ +public class HCatTable { + + private String tableName; + private String tabletype; + private List cols; + private List partCols; + private List bucketCols; + private List sortCols; + private int numBuckets; + private String inputFileFormat; + private String outputFileFormat; + private String storageHandler; + private Map tblProps; + private String dbName; + private String serde; + private String location; + + HCatTable(Table hiveTable) throws HCatException { + this.tableName = hiveTable.getTableName(); + this.dbName = hiveTable.getDbName(); + this.tabletype = hiveTable.getTableType(); + cols = new ArrayList(); + for (FieldSchema colFS : hiveTable.getSd().getCols()) { + cols.add(HCatSchemaUtils.getHCatFieldSchema(colFS)); + } + partCols = new ArrayList(); + for (FieldSchema colFS : hiveTable.getPartitionKeys()) { + partCols.add(HCatSchemaUtils.getHCatFieldSchema(colFS)); + } + bucketCols = hiveTable.getSd().getBucketCols(); + sortCols = hiveTable.getSd().getSortCols(); + numBuckets = hiveTable.getSd().getNumBuckets(); + inputFileFormat = hiveTable.getSd().getInputFormat(); + outputFileFormat = hiveTable.getSd().getOutputFormat(); + storageHandler = hiveTable + .getSd() + .getParameters() + .get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE); + tblProps = hiveTable.getParameters(); + serde = hiveTable.getSd().getSerdeInfo().getSerializationLib(); + location = hiveTable.getSd().getLocation(); + } + + /** + * Gets the table name. + * + * @return the table name + */ + public String getTableName() { + return tableName; + } + + /** + * Gets the db name. + * + * @return the db name + */ + public String getDbName() { + return dbName; + } + + /** + * Gets the columns. + * + * @return the columns + */ + public List getCols() { + return cols; + } + + /** + * Gets the part columns. + * + * @return the part columns + */ + public List getPartCols() { + return partCols; + } + + /** + * Gets the bucket columns. + * + * @return the bucket columns + */ + public List getBucketCols() { + return bucketCols; + } + + /** + * Gets the sort columns. + * + * @return the sort columns + */ + public List getSortCols() { + return sortCols; + } + + /** + * Gets the number of buckets. + * + * @return the number of buckets + */ + public int getNumBuckets() { + return numBuckets; + } + + /** + * Gets the storage handler. + * + * @return the storage handler + */ + public String getStorageHandler() { + return storageHandler; + } + + /** + * Gets the table props. + * + * @return the table props + */ + public Map getTblProps() { + return tblProps; + } + + /** + * Gets the tabletype. + * + * @return the tabletype + */ + public String getTabletype() { + return tabletype; + } + + /** + * Gets the input file format. + * + * @return the input file format + */ + public String getInputFileFormat() { + return inputFileFormat; + } + + /** + * Gets the output file format. + * + * @return the output file format + */ + public String getOutputFileFormat() { + return outputFileFormat; + } + + /** + * Gets the serde lib. + * + * @return the serde lib + */ + public String getSerdeLib() { + return serde; + } + + /** + * Gets the location. + * + * @return the location + */ + public String getLocation() { + return location; + } + + @Override + public String toString() { + return "HCatTable [" + + (tableName != null ? "tableName=" + tableName + ", " : "tableName=null") + + (dbName != null ? "dbName=" + dbName + ", " : "dbName=null") + + (tabletype != null ? "tabletype=" + tabletype + ", " : "tabletype=null") + + (cols != null ? "cols=" + cols + ", " : "cols=null") + + (partCols != null ? "partCols=" + partCols + ", " : "partCols==null") + + (bucketCols != null ? "bucketCols=" + bucketCols + ", " : "bucketCols=null") + + (sortCols != null ? "sortCols=" + sortCols + ", " : "sortCols=null") + + "numBuckets=" + + numBuckets + + ", " + + (inputFileFormat != null ? "inputFileFormat=" + + inputFileFormat + ", " : "inputFileFormat=null") + + (outputFileFormat != null ? "outputFileFormat=" + + outputFileFormat + ", " : "outputFileFormat=null") + + (storageHandler != null ? "storageHandler=" + storageHandler + + ", " : "storageHandler=null") + + (tblProps != null ? "tblProps=" + tblProps + ", " : "tblProps=null") + + (serde != null ? "serde=" + serde + ", " : "serde=") + + (location != null ? "location=" + location : "location=") + "]"; + } +} diff --git hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/ObjectNotFoundException.java hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/ObjectNotFoundException.java new file mode 100644 index 0000000..af6815a --- /dev/null +++ hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/ObjectNotFoundException.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.api; + +import org.apache.hive.hcatalog.common.HCatException; + +/** + * This exception is thrown when a Database, Table or Partition + * specified in an HCatalog query is not found. + */ +public class ObjectNotFoundException extends HCatException { + + private static final long serialVersionUID = 1L; + + /** + * @param message Exception message. + * @param cause The wrapped Throwable that caused this exception. + */ + public ObjectNotFoundException(String message, Throwable cause) { + super(message, cause); + } +} diff --git hcatalog/webhcat/java-client/src/test/java/org/apache/hcatalog/api/TestHCatClient.java hcatalog/webhcat/java-client/src/test/java/org/apache/hcatalog/api/TestHCatClient.java deleted file mode 100644 index fadbf5e..0000000 --- hcatalog/webhcat/java-client/src/test/java/org/apache/hcatalog/api/TestHCatClient.java +++ /dev/null @@ -1,640 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.api; - -import java.math.BigInteger; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Random; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStore; -import org.apache.hadoop.hive.metastore.api.PartitionEventType; -import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; -import org.apache.hadoop.hive.ql.io.RCFileInputFormat; -import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; -import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; -import org.apache.hadoop.mapred.TextInputFormat; -import org.apache.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hcatalog.common.HCatConstants; -import org.apache.hcatalog.common.HCatException; -import org.apache.hcatalog.data.schema.HCatFieldSchema; -import org.apache.hcatalog.data.schema.HCatFieldSchema.Type; -import org.apache.hcatalog.NoExitSecurityManager; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.assertArrayEquals; - -public class TestHCatClient { - private static final Logger LOG = LoggerFactory.getLogger(TestHCatClient.class); - private static final String msPort = "20101"; - private static HiveConf hcatConf; - private static SecurityManager securityManager; - - private static class RunMS implements Runnable { - - @Override - public void run() { - try { - HiveMetaStore.main(new String[]{"-v", "-p", msPort}); - } catch (Throwable t) { - LOG.error("Exiting. Got exception from metastore: ", t); - } - } - } - - @AfterClass - public static void tearDown() throws Exception { - LOG.info("Shutting down metastore."); - System.setSecurityManager(securityManager); - } - - @BeforeClass - public static void startMetaStoreServer() throws Exception { - - Thread t = new Thread(new RunMS()); - t.start(); - Thread.sleep(40000); - - securityManager = System.getSecurityManager(); - System.setSecurityManager(new NoExitSecurityManager()); - hcatConf = new HiveConf(TestHCatClient.class); - hcatConf.set("hive.metastore.local", "false"); - hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" - + msPort); - hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); - hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, - HCatSemanticAnalyzer.class.getName()); - hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); - hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, - "false"); - System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); - System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); - } - - @Test - public void testBasicDDLCommands() throws Exception { - String db = "testdb"; - String tableOne = "testTable1"; - String tableTwo = "testTable2"; - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - client.dropDatabase(db, true, HCatClient.DropDBMode.CASCADE); - - HCatCreateDBDesc dbDesc = HCatCreateDBDesc.create(db).ifNotExists(false) - .build(); - client.createDatabase(dbDesc); - List dbNames = client.listDatabaseNamesByPattern("*"); - assertTrue(dbNames.contains("default")); - assertTrue(dbNames.contains(db)); - - HCatDatabase testDb = client.getDatabase(db); - assertTrue(testDb.getComment() == null); - assertTrue(testDb.getProperties().size() == 0); - String warehouseDir = System - .getProperty(ConfVars.METASTOREWAREHOUSE.varname, "/user/hive/warehouse"); - String expectedDir = warehouseDir.replaceAll("\\\\", "/"); - if (!expectedDir.startsWith("/")) { - expectedDir = "/" + expectedDir; - } - assertTrue(testDb.getLocation().equals( - "file:" + expectedDir + "/" + db + ".db")); - ArrayList cols = new ArrayList(); - cols.add(new HCatFieldSchema("id", Type.INT, "id comment")); - cols.add(new HCatFieldSchema("value", Type.STRING, "value comment")); - HCatCreateTableDesc tableDesc = HCatCreateTableDesc - .create(db, tableOne, cols).fileFormat("rcfile").build(); - client.createTable(tableDesc); - HCatTable table1 = client.getTable(db, tableOne); - assertTrue(table1.getInputFileFormat().equalsIgnoreCase( - RCFileInputFormat.class.getName())); - assertTrue(table1.getOutputFileFormat().equalsIgnoreCase( - RCFileOutputFormat.class.getName())); - assertTrue(table1.getSerdeLib().equalsIgnoreCase( - ColumnarSerDe.class.getName())); - assertTrue(table1.getCols().equals(cols)); - // Since "ifexists" was not set to true, trying to create the same table - // again - // will result in an exception. - try { - client.createTable(tableDesc); - } catch (HCatException e) { - assertTrue(e.getMessage().contains( - "AlreadyExistsException while creating table.")); - } - - client.dropTable(db, tableOne, true); - HCatCreateTableDesc tableDesc2 = HCatCreateTableDesc.create(db, - tableTwo, cols).build(); - client.createTable(tableDesc2); - HCatTable table2 = client.getTable(db, tableTwo); - assertTrue(table2.getInputFileFormat().equalsIgnoreCase( - TextInputFormat.class.getName())); - assertTrue(table2.getOutputFileFormat().equalsIgnoreCase( - IgnoreKeyTextOutputFormat.class.getName())); - assertTrue(table2.getLocation().equalsIgnoreCase( - "file:" + expectedDir + "/" + db + ".db/" + tableTwo)); - client.close(); - } - - @Test - public void testPartitionsHCatClientImpl() throws Exception { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String dbName = "ptnDB"; - String tableName = "pageView"; - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - - HCatCreateDBDesc dbDesc = HCatCreateDBDesc.create(dbName) - .ifNotExists(true).build(); - client.createDatabase(dbDesc); - ArrayList cols = new ArrayList(); - cols.add(new HCatFieldSchema("userid", Type.INT, "id columns")); - cols.add(new HCatFieldSchema("viewtime", Type.BIGINT, - "view time columns")); - cols.add(new HCatFieldSchema("pageurl", Type.STRING, "")); - cols.add(new HCatFieldSchema("ip", Type.STRING, - "IP Address of the User")); - - ArrayList ptnCols = new ArrayList(); - ptnCols.add(new HCatFieldSchema("dt", Type.STRING, "date column")); - ptnCols.add(new HCatFieldSchema("country", Type.STRING, - "country column")); - HCatCreateTableDesc tableDesc = HCatCreateTableDesc - .create(dbName, tableName, cols).fileFormat("sequencefile") - .partCols(ptnCols).build(); - client.createTable(tableDesc); - - Map firstPtn = new HashMap(); - firstPtn.put("dt", "04/30/2012"); - firstPtn.put("country", "usa"); - HCatAddPartitionDesc addPtn = HCatAddPartitionDesc.create(dbName, - tableName, null, firstPtn).build(); - client.addPartition(addPtn); - - Map secondPtn = new HashMap(); - secondPtn.put("dt", "04/12/2012"); - secondPtn.put("country", "brazil"); - HCatAddPartitionDesc addPtn2 = HCatAddPartitionDesc.create(dbName, - tableName, null, secondPtn).build(); - client.addPartition(addPtn2); - - Map thirdPtn = new HashMap(); - thirdPtn.put("dt", "04/13/2012"); - thirdPtn.put("country", "argentina"); - HCatAddPartitionDesc addPtn3 = HCatAddPartitionDesc.create(dbName, - tableName, null, thirdPtn).build(); - client.addPartition(addPtn3); - - List ptnList = client.listPartitionsByFilter(dbName, - tableName, null); - assertTrue(ptnList.size() == 3); - - HCatPartition ptn = client.getPartition(dbName, tableName, firstPtn); - assertTrue(ptn != null); - - client.dropPartitions(dbName, tableName, firstPtn, true); - ptnList = client.listPartitionsByFilter(dbName, - tableName, null); - assertTrue(ptnList.size() == 2); - - List ptnListTwo = client.listPartitionsByFilter(dbName, - tableName, "country = \"argentina\""); - assertTrue(ptnListTwo.size() == 1); - - client.markPartitionForEvent(dbName, tableName, thirdPtn, - PartitionEventType.LOAD_DONE); - boolean isMarked = client.isPartitionMarkedForEvent(dbName, tableName, - thirdPtn, PartitionEventType.LOAD_DONE); - assertTrue(isMarked); - client.close(); - } - - @Test - public void testDatabaseLocation() throws Exception { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String dbName = "locationDB"; - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - - HCatCreateDBDesc dbDesc = HCatCreateDBDesc.create(dbName) - .ifNotExists(true).location("/tmp/" + dbName).build(); - client.createDatabase(dbDesc); - HCatDatabase newDB = client.getDatabase(dbName); - assertTrue(newDB.getLocation().equalsIgnoreCase("file:/tmp/" + dbName)); - client.close(); - } - - @Test - public void testCreateTableLike() throws Exception { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String tableName = "tableone"; - String cloneTable = "tabletwo"; - client.dropTable(null, tableName, true); - client.dropTable(null, cloneTable, true); - - ArrayList cols = new ArrayList(); - cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); - cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); - HCatCreateTableDesc tableDesc = HCatCreateTableDesc - .create(null, tableName, cols).fileFormat("rcfile").build(); - client.createTable(tableDesc); - // create a new table similar to previous one. - client.createTableLike(null, tableName, cloneTable, true, false, null); - List tables = client.listTableNamesByPattern(null, "table*"); - assertTrue(tables.size() == 2); - client.close(); - } - - @Test - public void testRenameTable() throws Exception { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String tableName = "temptable"; - String newName = "mytable"; - client.dropTable(null, tableName, true); - client.dropTable(null, newName, true); - ArrayList cols = new ArrayList(); - cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); - cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); - HCatCreateTableDesc tableDesc = HCatCreateTableDesc - .create(null, tableName, cols).fileFormat("rcfile").build(); - client.createTable(tableDesc); - client.renameTable(null, tableName, newName); - try { - client.getTable(null, tableName); - } catch (HCatException exp) { - assertTrue("Unexpected exception message: " + exp.getMessage(), - exp.getMessage().contains("NoSuchObjectException while fetching table")); - } - HCatTable newTable = client.getTable(null, newName); - assertTrue(newTable != null); - assertTrue(newTable.getTableName().equals(newName)); - client.close(); - } - - @Test - public void testTransportFailure() throws Exception { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - boolean isExceptionCaught = false; - // Table creation with a long table name causes ConnectionFailureException - final String tableName = "Temptable" + new BigInteger(200, new Random()).toString(2); - - ArrayList cols = new ArrayList(); - cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); - cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); - try { - HCatCreateTableDesc tableDesc = HCatCreateTableDesc - .create(null, tableName, cols).fileFormat("rcfile").build(); - client.createTable(tableDesc); - } catch (Exception exp) { - isExceptionCaught = true; - assertEquals("Unexpected exception type.", HCatException.class, exp.getClass()); - // The connection was closed, so create a new one. - client = HCatClient.create(new Configuration(hcatConf)); - String newName = "goodTable"; - client.dropTable(null, newName, true); - HCatCreateTableDesc tableDesc2 = HCatCreateTableDesc - .create(null, newName, cols).fileFormat("rcfile").build(); - client.createTable(tableDesc2); - HCatTable newTable = client.getTable(null, newName); - assertTrue(newTable != null); - assertTrue(newTable.getTableName().equalsIgnoreCase(newName)); - - } finally { - client.close(); - assertTrue("The expected exception was never thrown.", isExceptionCaught); - } - } - - @Test - public void testOtherFailure() throws Exception { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String tableName = "Temptable"; - boolean isExceptionCaught = false; - client.dropTable(null, tableName, true); - ArrayList cols = new ArrayList(); - cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); - cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); - try { - HCatCreateTableDesc tableDesc = HCatCreateTableDesc - .create(null, tableName, cols).fileFormat("rcfile").build(); - client.createTable(tableDesc); - // The DB foo is non-existent. - client.getTable("foo", tableName); - } catch (Exception exp) { - isExceptionCaught = true; - assertTrue(exp instanceof HCatException); - String newName = "goodTable"; - client.dropTable(null, newName, true); - HCatCreateTableDesc tableDesc2 = HCatCreateTableDesc - .create(null, newName, cols).fileFormat("rcfile").build(); - client.createTable(tableDesc2); - HCatTable newTable = client.getTable(null, newName); - assertTrue(newTable != null); - assertTrue(newTable.getTableName().equalsIgnoreCase(newName)); - } finally { - client.close(); - assertTrue("The expected exception was never thrown.", isExceptionCaught); - } - } - - @Test - public void testDropTableException() throws Exception { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String tableName = "tableToBeDropped"; - boolean isExceptionCaught = false; - client.dropTable(null, tableName, true); - try { - client.dropTable(null, tableName, false); - } catch (Exception exp) { - isExceptionCaught = true; - assertTrue(exp instanceof HCatException); - LOG.info("Drop Table Exception: " + exp.getCause()); - } finally { - client.close(); - assertTrue("The expected exception was never thrown.", isExceptionCaught); - } - } - - @Test - public void testUpdateTableSchema() throws Exception { - try { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - final String dbName = "testUpdateTableSchema_DBName"; - final String tableName = "testUpdateTableSchema_TableName"; - - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - - client.createDatabase(HCatCreateDBDesc.create(dbName).build()); - List oldSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), - new HCatFieldSchema("bar", Type.STRING, "")); - client.createTable(HCatCreateTableDesc.create(dbName, tableName, oldSchema).build()); - - List newSchema = Arrays.asList(new HCatFieldSchema("completely", Type.DOUBLE, ""), - new HCatFieldSchema("new", Type.FLOAT, ""), - new HCatFieldSchema("fields", Type.STRING, "")); - - client.updateTableSchema(dbName, tableName, newSchema); - - assertArrayEquals(newSchema.toArray(), client.getTable(dbName, tableName).getCols().toArray()); - - client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); - } - catch (Exception exception) { - LOG.error("Unexpected exception.", exception); - assertTrue("Unexpected exception: " + exception.getMessage(), false); - } - } - - @Test - public void testObjectNotFoundException() throws Exception { - try { - - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String dbName = "testObjectNotFoundException_DBName"; - String tableName = "testObjectNotFoundException_TableName"; - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - - try { // Test that fetching a non-existent db-name yields ObjectNotFound. - client.getDatabase(dbName); - assertTrue("Expected ObjectNotFoundException.", false); - } catch(Exception exception) { - LOG.info("Got exception: ", exception); - assertTrue("Expected ObjectNotFoundException. Got:" + exception.getClass(), - exception instanceof ObjectNotFoundException); - } - - client.createDatabase(HCatCreateDBDesc.create(dbName).build()); - - try { // Test that fetching a non-existent table-name yields ObjectNotFound. - client.getTable(dbName, tableName); - assertTrue("Expected ObjectNotFoundException.", false); - } catch(Exception exception) { - LOG.info("Got exception: ", exception); - assertTrue("Expected ObjectNotFoundException. Got:" + exception.getClass(), - exception instanceof ObjectNotFoundException); - } - - String partitionColumn = "part"; - - List columns = Arrays.asList(new HCatFieldSchema("col", Type.STRING, "")); - ArrayList partitionColumns = new ArrayList( - Arrays.asList(new HCatFieldSchema(partitionColumn, Type.STRING, ""))); - client.createTable(HCatCreateTableDesc.create(dbName, tableName, columns) - .partCols(partitionColumns) - .build()); - - Map partitionSpec = new HashMap(); - partitionSpec.put(partitionColumn, "foobar"); - try { // Test that fetching a non-existent partition yields ObjectNotFound. - client.getPartition(dbName, tableName, partitionSpec); - assertTrue("Expected ObjectNotFoundException.", false); - } catch(Exception exception) { - LOG.info("Got exception: ", exception); - assertTrue("Expected ObjectNotFoundException. Got:" + exception.getClass(), - exception instanceof ObjectNotFoundException); - } - - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - - // Test that listPartitionsByFilter() returns an empty-set, if the filter selects no partitions. - assertEquals("Expected empty set of partitions.", - 0, client.listPartitionsByFilter(dbName, tableName, partitionColumn + " < 'foobar'").size()); - - try { // Test that listPartitionsByFilter() throws HCatException if the partition-key is incorrect. - partitionSpec.put("NonExistentKey", "foobar"); - client.getPartition(dbName, tableName, partitionSpec); - assertTrue("Expected HCatException.", false); - } catch(Exception exception) { - LOG.info("Got exception: ", exception); - assertTrue("Expected HCatException. Got:" + exception.getClass(), - exception instanceof HCatException); - assertFalse("Did not expect ObjectNotFoundException.", exception instanceof ObjectNotFoundException); - } - - } - catch (Throwable t) { - LOG.error("Unexpected exception!", t); - assertTrue("Unexpected exception! " + t.getMessage(), false); - } - } - - @Test - public void testGetMessageBusTopicName() throws Exception { - try { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - String dbName = "testGetMessageBusTopicName_DBName"; - String tableName = "testGetMessageBusTopicName_TableName"; - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - client.createDatabase(HCatCreateDBDesc.create(dbName).build()); - String messageBusTopicName = "MY.topic.name"; - Map tableProperties = new HashMap(1); - tableProperties.put(HCatConstants.HCAT_MSGBUS_TOPIC_NAME, messageBusTopicName); - client.createTable(HCatCreateTableDesc.create(dbName, tableName, Arrays.asList(new HCatFieldSchema("foo", Type.STRING, ""))).tblProps(tableProperties).build()); - - assertEquals("MessageBus topic-name doesn't match!", messageBusTopicName, client.getMessageBusTopicName(dbName, tableName)); - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - client.close(); - } - catch (Exception exception) { - LOG.error("Unexpected exception.", exception); - assertTrue("Unexpected exception:" + exception.getMessage(), false); - } - } - - @Test - public void testPartitionSchema() throws Exception { - try { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - final String dbName = "myDb"; - final String tableName = "myTable"; - - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - - client.createDatabase(HCatCreateDBDesc.create(dbName).build()); - List columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), - new HCatFieldSchema("bar", Type.STRING, "")); - - List partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), - new HCatFieldSchema("grid", Type.STRING, "")); - - client.createTable(HCatCreateTableDesc.create(dbName, tableName, columnSchema).partCols(partitionSchema).build()); - - HCatTable table = client.getTable(dbName, tableName); - List partitionColumns = table.getPartCols(); - - assertArrayEquals("Didn't get expected partition-schema back from the HCatTable.", - partitionSchema.toArray(), partitionColumns.toArray()); - client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); - } - catch (Exception unexpected) { - LOG.error("Unexpected exception!", unexpected); - assertTrue("Unexpected exception! " + unexpected.getMessage(), false); - } - } - - @Test - public void testGetPartitionsWithPartialSpec() throws Exception { - try { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - final String dbName = "myDb"; - final String tableName = "myTable"; - - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - - client.createDatabase(HCatCreateDBDesc.create(dbName).build()); - List columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), - new HCatFieldSchema("bar", Type.STRING, "")); - - List partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), - new HCatFieldSchema("grid", Type.STRING, "")); - - client.createTable(HCatCreateTableDesc.create(dbName, tableName, columnSchema).partCols(new ArrayList(partitionSchema)).build()); - - Map partitionSpec = new HashMap(); - partitionSpec.put("grid", "AB"); - partitionSpec.put("dt", "2011_12_31"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - partitionSpec.put("grid", "AB"); - partitionSpec.put("dt", "2012_01_01"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - partitionSpec.put("dt", "2012_01_01"); - partitionSpec.put("grid", "OB"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - partitionSpec.put("dt", "2012_01_01"); - partitionSpec.put("grid", "XB"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - - Map partialPartitionSpec = new HashMap(); - partialPartitionSpec.put("dt", "2012_01_01"); - - List partitions = client.getPartitions(dbName, tableName, partialPartitionSpec); - assertEquals("Unexpected number of partitions.", 3, partitions.size()); - assertArrayEquals("Mismatched partition.", new String[]{"2012_01_01", "AB"}, partitions.get(0).getValues().toArray()); - assertArrayEquals("Mismatched partition.", new String[]{"2012_01_01", "OB"}, partitions.get(1).getValues().toArray()); - assertArrayEquals("Mismatched partition.", new String[]{"2012_01_01", "XB"}, partitions.get(2).getValues().toArray()); - - client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); - } - catch (Exception unexpected) { - LOG.error("Unexpected exception!", unexpected); - assertTrue("Unexpected exception! " + unexpected.getMessage(), false); - } - } - - @Test - public void testDropPartitionsWithPartialSpec() throws Exception { - try { - HCatClient client = HCatClient.create(new Configuration(hcatConf)); - final String dbName = "myDb"; - final String tableName = "myTable"; - - client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); - - client.createDatabase(HCatCreateDBDesc.create(dbName).build()); - List columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), - new HCatFieldSchema("bar", Type.STRING, "")); - - List partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), - new HCatFieldSchema("grid", Type.STRING, "")); - - client.createTable(HCatCreateTableDesc.create(dbName, tableName, columnSchema).partCols(new ArrayList(partitionSchema)).build()); - - Map partitionSpec = new HashMap(); - partitionSpec.put("grid", "AB"); - partitionSpec.put("dt", "2011_12_31"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - partitionSpec.put("grid", "AB"); - partitionSpec.put("dt", "2012_01_01"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - partitionSpec.put("dt", "2012_01_01"); - partitionSpec.put("grid", "OB"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - partitionSpec.put("dt", "2012_01_01"); - partitionSpec.put("grid", "XB"); - client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); - - Map partialPartitionSpec = new HashMap(); - partialPartitionSpec.put("dt", "2012_01_01"); - - client.dropPartitions(dbName, tableName, partialPartitionSpec, true); - - List partitions = client.getPartitions(dbName, tableName); - assertEquals("Unexpected number of partitions.", 1, partitions.size()); - assertArrayEquals("Mismatched partition.", new String[]{"2011_12_31", "AB"}, partitions.get(0).getValues().toArray()); - - client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); - } - catch (Exception unexpected) { - LOG.error("Unexpected exception!", unexpected); - assertTrue("Unexpected exception! " + unexpected.getMessage(), false); - } - } - -} diff --git hcatalog/webhcat/java-client/src/test/java/org/apache/hive/hcatalog/api/TestHCatClient.java hcatalog/webhcat/java-client/src/test/java/org/apache/hive/hcatalog/api/TestHCatClient.java new file mode 100644 index 0000000..357856c --- /dev/null +++ hcatalog/webhcat/java-client/src/test/java/org/apache/hive/hcatalog/api/TestHCatClient.java @@ -0,0 +1,640 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.api; + +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStore; +import org.apache.hadoop.hive.metastore.api.PartitionEventType; +import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; +import org.apache.hadoop.hive.ql.io.RCFileInputFormat; +import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; +import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; +import org.apache.hadoop.mapred.TextInputFormat; +import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hive.hcatalog.common.HCatConstants; +import org.apache.hive.hcatalog.common.HCatException; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; +import org.apache.hive.hcatalog.data.schema.HCatFieldSchema.Type; +import org.apache.hive.hcatalog.NoExitSecurityManager; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertArrayEquals; + +public class TestHCatClient { + private static final Logger LOG = LoggerFactory.getLogger(TestHCatClient.class); + private static final String msPort = "20101"; + private static HiveConf hcatConf; + private static SecurityManager securityManager; + + private static class RunMS implements Runnable { + + @Override + public void run() { + try { + HiveMetaStore.main(new String[]{"-v", "-p", msPort}); + } catch (Throwable t) { + LOG.error("Exiting. Got exception from metastore: ", t); + } + } + } + + @AfterClass + public static void tearDown() throws Exception { + LOG.info("Shutting down metastore."); + System.setSecurityManager(securityManager); + } + + @BeforeClass + public static void startMetaStoreServer() throws Exception { + + Thread t = new Thread(new RunMS()); + t.start(); + Thread.sleep(40000); + + securityManager = System.getSecurityManager(); + System.setSecurityManager(new NoExitSecurityManager()); + hcatConf = new HiveConf(TestHCatClient.class); + hcatConf.set("hive.metastore.local", "false"); + hcatConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + + msPort); + hcatConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); + hcatConf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, + HCatSemanticAnalyzer.class.getName()); + hcatConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hcatConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hcatConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, + "false"); + System.setProperty(HiveConf.ConfVars.PREEXECHOOKS.varname, " "); + System.setProperty(HiveConf.ConfVars.POSTEXECHOOKS.varname, " "); + } + + @Test + public void testBasicDDLCommands() throws Exception { + String db = "testdb"; + String tableOne = "testTable1"; + String tableTwo = "testTable2"; + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + client.dropDatabase(db, true, HCatClient.DropDBMode.CASCADE); + + HCatCreateDBDesc dbDesc = HCatCreateDBDesc.create(db).ifNotExists(false) + .build(); + client.createDatabase(dbDesc); + List dbNames = client.listDatabaseNamesByPattern("*"); + assertTrue(dbNames.contains("default")); + assertTrue(dbNames.contains(db)); + + HCatDatabase testDb = client.getDatabase(db); + assertTrue(testDb.getComment() == null); + assertTrue(testDb.getProperties().size() == 0); + String warehouseDir = System + .getProperty(ConfVars.METASTOREWAREHOUSE.varname, "/user/hive/warehouse"); + String expectedDir = warehouseDir.replaceAll("\\\\", "/"); + if (!expectedDir.startsWith("/")) { + expectedDir = "/" + expectedDir; + } + assertTrue(testDb.getLocation().equals( + "file:" + expectedDir + "/" + db + ".db")); + ArrayList cols = new ArrayList(); + cols.add(new HCatFieldSchema("id", Type.INT, "id comment")); + cols.add(new HCatFieldSchema("value", Type.STRING, "value comment")); + HCatCreateTableDesc tableDesc = HCatCreateTableDesc + .create(db, tableOne, cols).fileFormat("rcfile").build(); + client.createTable(tableDesc); + HCatTable table1 = client.getTable(db, tableOne); + assertTrue(table1.getInputFileFormat().equalsIgnoreCase( + RCFileInputFormat.class.getName())); + assertTrue(table1.getOutputFileFormat().equalsIgnoreCase( + RCFileOutputFormat.class.getName())); + assertTrue(table1.getSerdeLib().equalsIgnoreCase( + ColumnarSerDe.class.getName())); + assertTrue(table1.getCols().equals(cols)); + // Since "ifexists" was not set to true, trying to create the same table + // again + // will result in an exception. + try { + client.createTable(tableDesc); + } catch (HCatException e) { + assertTrue(e.getMessage().contains( + "AlreadyExistsException while creating table.")); + } + + client.dropTable(db, tableOne, true); + HCatCreateTableDesc tableDesc2 = HCatCreateTableDesc.create(db, + tableTwo, cols).build(); + client.createTable(tableDesc2); + HCatTable table2 = client.getTable(db, tableTwo); + assertTrue(table2.getInputFileFormat().equalsIgnoreCase( + TextInputFormat.class.getName())); + assertTrue(table2.getOutputFileFormat().equalsIgnoreCase( + IgnoreKeyTextOutputFormat.class.getName())); + assertTrue(table2.getLocation().equalsIgnoreCase( + "file:" + expectedDir + "/" + db + ".db/" + tableTwo)); + client.close(); + } + + @Test + public void testPartitionsHCatClientImpl() throws Exception { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String dbName = "ptnDB"; + String tableName = "pageView"; + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + + HCatCreateDBDesc dbDesc = HCatCreateDBDesc.create(dbName) + .ifNotExists(true).build(); + client.createDatabase(dbDesc); + ArrayList cols = new ArrayList(); + cols.add(new HCatFieldSchema("userid", Type.INT, "id columns")); + cols.add(new HCatFieldSchema("viewtime", Type.BIGINT, + "view time columns")); + cols.add(new HCatFieldSchema("pageurl", Type.STRING, "")); + cols.add(new HCatFieldSchema("ip", Type.STRING, + "IP Address of the User")); + + ArrayList ptnCols = new ArrayList(); + ptnCols.add(new HCatFieldSchema("dt", Type.STRING, "date column")); + ptnCols.add(new HCatFieldSchema("country", Type.STRING, + "country column")); + HCatCreateTableDesc tableDesc = HCatCreateTableDesc + .create(dbName, tableName, cols).fileFormat("sequencefile") + .partCols(ptnCols).build(); + client.createTable(tableDesc); + + Map firstPtn = new HashMap(); + firstPtn.put("dt", "04/30/2012"); + firstPtn.put("country", "usa"); + HCatAddPartitionDesc addPtn = HCatAddPartitionDesc.create(dbName, + tableName, null, firstPtn).build(); + client.addPartition(addPtn); + + Map secondPtn = new HashMap(); + secondPtn.put("dt", "04/12/2012"); + secondPtn.put("country", "brazil"); + HCatAddPartitionDesc addPtn2 = HCatAddPartitionDesc.create(dbName, + tableName, null, secondPtn).build(); + client.addPartition(addPtn2); + + Map thirdPtn = new HashMap(); + thirdPtn.put("dt", "04/13/2012"); + thirdPtn.put("country", "argentina"); + HCatAddPartitionDesc addPtn3 = HCatAddPartitionDesc.create(dbName, + tableName, null, thirdPtn).build(); + client.addPartition(addPtn3); + + List ptnList = client.listPartitionsByFilter(dbName, + tableName, null); + assertTrue(ptnList.size() == 3); + + HCatPartition ptn = client.getPartition(dbName, tableName, firstPtn); + assertTrue(ptn != null); + + client.dropPartitions(dbName, tableName, firstPtn, true); + ptnList = client.listPartitionsByFilter(dbName, + tableName, null); + assertTrue(ptnList.size() == 2); + + List ptnListTwo = client.listPartitionsByFilter(dbName, + tableName, "country = \"argentina\""); + assertTrue(ptnListTwo.size() == 1); + + client.markPartitionForEvent(dbName, tableName, thirdPtn, + PartitionEventType.LOAD_DONE); + boolean isMarked = client.isPartitionMarkedForEvent(dbName, tableName, + thirdPtn, PartitionEventType.LOAD_DONE); + assertTrue(isMarked); + client.close(); + } + + @Test + public void testDatabaseLocation() throws Exception { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String dbName = "locationDB"; + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + + HCatCreateDBDesc dbDesc = HCatCreateDBDesc.create(dbName) + .ifNotExists(true).location("/tmp/" + dbName).build(); + client.createDatabase(dbDesc); + HCatDatabase newDB = client.getDatabase(dbName); + assertTrue(newDB.getLocation().equalsIgnoreCase("file:/tmp/" + dbName)); + client.close(); + } + + @Test + public void testCreateTableLike() throws Exception { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String tableName = "tableone"; + String cloneTable = "tabletwo"; + client.dropTable(null, tableName, true); + client.dropTable(null, cloneTable, true); + + ArrayList cols = new ArrayList(); + cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); + cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); + HCatCreateTableDesc tableDesc = HCatCreateTableDesc + .create(null, tableName, cols).fileFormat("rcfile").build(); + client.createTable(tableDesc); + // create a new table similar to previous one. + client.createTableLike(null, tableName, cloneTable, true, false, null); + List tables = client.listTableNamesByPattern(null, "table*"); + assertTrue(tables.size() == 2); + client.close(); + } + + @Test + public void testRenameTable() throws Exception { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String tableName = "temptable"; + String newName = "mytable"; + client.dropTable(null, tableName, true); + client.dropTable(null, newName, true); + ArrayList cols = new ArrayList(); + cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); + cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); + HCatCreateTableDesc tableDesc = HCatCreateTableDesc + .create(null, tableName, cols).fileFormat("rcfile").build(); + client.createTable(tableDesc); + client.renameTable(null, tableName, newName); + try { + client.getTable(null, tableName); + } catch (HCatException exp) { + assertTrue("Unexpected exception message: " + exp.getMessage(), + exp.getMessage().contains("NoSuchObjectException while fetching table")); + } + HCatTable newTable = client.getTable(null, newName); + assertTrue(newTable != null); + assertTrue(newTable.getTableName().equals(newName)); + client.close(); + } + + @Test + public void testTransportFailure() throws Exception { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + boolean isExceptionCaught = false; + // Table creation with a long table name causes ConnectionFailureException + final String tableName = "Temptable" + new BigInteger(200, new Random()).toString(2); + + ArrayList cols = new ArrayList(); + cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); + cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); + try { + HCatCreateTableDesc tableDesc = HCatCreateTableDesc + .create(null, tableName, cols).fileFormat("rcfile").build(); + client.createTable(tableDesc); + } catch (Exception exp) { + isExceptionCaught = true; + assertEquals("Unexpected exception type.", HCatException.class, exp.getClass()); + // The connection was closed, so create a new one. + client = HCatClient.create(new Configuration(hcatConf)); + String newName = "goodTable"; + client.dropTable(null, newName, true); + HCatCreateTableDesc tableDesc2 = HCatCreateTableDesc + .create(null, newName, cols).fileFormat("rcfile").build(); + client.createTable(tableDesc2); + HCatTable newTable = client.getTable(null, newName); + assertTrue(newTable != null); + assertTrue(newTable.getTableName().equalsIgnoreCase(newName)); + + } finally { + client.close(); + assertTrue("The expected exception was never thrown.", isExceptionCaught); + } + } + + @Test + public void testOtherFailure() throws Exception { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String tableName = "Temptable"; + boolean isExceptionCaught = false; + client.dropTable(null, tableName, true); + ArrayList cols = new ArrayList(); + cols.add(new HCatFieldSchema("id", Type.INT, "id columns")); + cols.add(new HCatFieldSchema("value", Type.STRING, "id columns")); + try { + HCatCreateTableDesc tableDesc = HCatCreateTableDesc + .create(null, tableName, cols).fileFormat("rcfile").build(); + client.createTable(tableDesc); + // The DB foo is non-existent. + client.getTable("foo", tableName); + } catch (Exception exp) { + isExceptionCaught = true; + assertTrue(exp instanceof HCatException); + String newName = "goodTable"; + client.dropTable(null, newName, true); + HCatCreateTableDesc tableDesc2 = HCatCreateTableDesc + .create(null, newName, cols).fileFormat("rcfile").build(); + client.createTable(tableDesc2); + HCatTable newTable = client.getTable(null, newName); + assertTrue(newTable != null); + assertTrue(newTable.getTableName().equalsIgnoreCase(newName)); + } finally { + client.close(); + assertTrue("The expected exception was never thrown.", isExceptionCaught); + } + } + + @Test + public void testDropTableException() throws Exception { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String tableName = "tableToBeDropped"; + boolean isExceptionCaught = false; + client.dropTable(null, tableName, true); + try { + client.dropTable(null, tableName, false); + } catch (Exception exp) { + isExceptionCaught = true; + assertTrue(exp instanceof HCatException); + LOG.info("Drop Table Exception: " + exp.getCause()); + } finally { + client.close(); + assertTrue("The expected exception was never thrown.", isExceptionCaught); + } + } + + @Test + public void testUpdateTableSchema() throws Exception { + try { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + final String dbName = "testUpdateTableSchema_DBName"; + final String tableName = "testUpdateTableSchema_TableName"; + + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + + client.createDatabase(HCatCreateDBDesc.create(dbName).build()); + List oldSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), + new HCatFieldSchema("bar", Type.STRING, "")); + client.createTable(HCatCreateTableDesc.create(dbName, tableName, oldSchema).build()); + + List newSchema = Arrays.asList(new HCatFieldSchema("completely", Type.DOUBLE, ""), + new HCatFieldSchema("new", Type.FLOAT, ""), + new HCatFieldSchema("fields", Type.STRING, "")); + + client.updateTableSchema(dbName, tableName, newSchema); + + assertArrayEquals(newSchema.toArray(), client.getTable(dbName, tableName).getCols().toArray()); + + client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); + } + catch (Exception exception) { + LOG.error("Unexpected exception.", exception); + assertTrue("Unexpected exception: " + exception.getMessage(), false); + } + } + + @Test + public void testObjectNotFoundException() throws Exception { + try { + + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String dbName = "testObjectNotFoundException_DBName"; + String tableName = "testObjectNotFoundException_TableName"; + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + + try { // Test that fetching a non-existent db-name yields ObjectNotFound. + client.getDatabase(dbName); + assertTrue("Expected ObjectNotFoundException.", false); + } catch(Exception exception) { + LOG.info("Got exception: ", exception); + assertTrue("Expected ObjectNotFoundException. Got:" + exception.getClass(), + exception instanceof ObjectNotFoundException); + } + + client.createDatabase(HCatCreateDBDesc.create(dbName).build()); + + try { // Test that fetching a non-existent table-name yields ObjectNotFound. + client.getTable(dbName, tableName); + assertTrue("Expected ObjectNotFoundException.", false); + } catch(Exception exception) { + LOG.info("Got exception: ", exception); + assertTrue("Expected ObjectNotFoundException. Got:" + exception.getClass(), + exception instanceof ObjectNotFoundException); + } + + String partitionColumn = "part"; + + List columns = Arrays.asList(new HCatFieldSchema("col", Type.STRING, "")); + ArrayList partitionColumns = new ArrayList( + Arrays.asList(new HCatFieldSchema(partitionColumn, Type.STRING, ""))); + client.createTable(HCatCreateTableDesc.create(dbName, tableName, columns) + .partCols(partitionColumns) + .build()); + + Map partitionSpec = new HashMap(); + partitionSpec.put(partitionColumn, "foobar"); + try { // Test that fetching a non-existent partition yields ObjectNotFound. + client.getPartition(dbName, tableName, partitionSpec); + assertTrue("Expected ObjectNotFoundException.", false); + } catch(Exception exception) { + LOG.info("Got exception: ", exception); + assertTrue("Expected ObjectNotFoundException. Got:" + exception.getClass(), + exception instanceof ObjectNotFoundException); + } + + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + + // Test that listPartitionsByFilter() returns an empty-set, if the filter selects no partitions. + assertEquals("Expected empty set of partitions.", + 0, client.listPartitionsByFilter(dbName, tableName, partitionColumn + " < 'foobar'").size()); + + try { // Test that listPartitionsByFilter() throws HCatException if the partition-key is incorrect. + partitionSpec.put("NonExistentKey", "foobar"); + client.getPartition(dbName, tableName, partitionSpec); + assertTrue("Expected HCatException.", false); + } catch(Exception exception) { + LOG.info("Got exception: ", exception); + assertTrue("Expected HCatException. Got:" + exception.getClass(), + exception instanceof HCatException); + assertFalse("Did not expect ObjectNotFoundException.", exception instanceof ObjectNotFoundException); + } + + } + catch (Throwable t) { + LOG.error("Unexpected exception!", t); + assertTrue("Unexpected exception! " + t.getMessage(), false); + } + } + + @Test + public void testGetMessageBusTopicName() throws Exception { + try { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + String dbName = "testGetMessageBusTopicName_DBName"; + String tableName = "testGetMessageBusTopicName_TableName"; + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + client.createDatabase(HCatCreateDBDesc.create(dbName).build()); + String messageBusTopicName = "MY.topic.name"; + Map tableProperties = new HashMap(1); + tableProperties.put(HCatConstants.HCAT_MSGBUS_TOPIC_NAME, messageBusTopicName); + client.createTable(HCatCreateTableDesc.create(dbName, tableName, Arrays.asList(new HCatFieldSchema("foo", Type.STRING, ""))).tblProps(tableProperties).build()); + + assertEquals("MessageBus topic-name doesn't match!", messageBusTopicName, client.getMessageBusTopicName(dbName, tableName)); + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + client.close(); + } + catch (Exception exception) { + LOG.error("Unexpected exception.", exception); + assertTrue("Unexpected exception:" + exception.getMessage(), false); + } + } + + @Test + public void testPartitionSchema() throws Exception { + try { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + final String dbName = "myDb"; + final String tableName = "myTable"; + + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + + client.createDatabase(HCatCreateDBDesc.create(dbName).build()); + List columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), + new HCatFieldSchema("bar", Type.STRING, "")); + + List partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), + new HCatFieldSchema("grid", Type.STRING, "")); + + client.createTable(HCatCreateTableDesc.create(dbName, tableName, columnSchema).partCols(partitionSchema).build()); + + HCatTable table = client.getTable(dbName, tableName); + List partitionColumns = table.getPartCols(); + + assertArrayEquals("Didn't get expected partition-schema back from the HCatTable.", + partitionSchema.toArray(), partitionColumns.toArray()); + client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); + } + catch (Exception unexpected) { + LOG.error("Unexpected exception!", unexpected); + assertTrue("Unexpected exception! " + unexpected.getMessage(), false); + } + } + + @Test + public void testGetPartitionsWithPartialSpec() throws Exception { + try { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + final String dbName = "myDb"; + final String tableName = "myTable"; + + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + + client.createDatabase(HCatCreateDBDesc.create(dbName).build()); + List columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), + new HCatFieldSchema("bar", Type.STRING, "")); + + List partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), + new HCatFieldSchema("grid", Type.STRING, "")); + + client.createTable(HCatCreateTableDesc.create(dbName, tableName, columnSchema).partCols(new ArrayList(partitionSchema)).build()); + + Map partitionSpec = new HashMap(); + partitionSpec.put("grid", "AB"); + partitionSpec.put("dt", "2011_12_31"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + partitionSpec.put("grid", "AB"); + partitionSpec.put("dt", "2012_01_01"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + partitionSpec.put("dt", "2012_01_01"); + partitionSpec.put("grid", "OB"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + partitionSpec.put("dt", "2012_01_01"); + partitionSpec.put("grid", "XB"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + + Map partialPartitionSpec = new HashMap(); + partialPartitionSpec.put("dt", "2012_01_01"); + + List partitions = client.getPartitions(dbName, tableName, partialPartitionSpec); + assertEquals("Unexpected number of partitions.", 3, partitions.size()); + assertArrayEquals("Mismatched partition.", new String[]{"2012_01_01", "AB"}, partitions.get(0).getValues().toArray()); + assertArrayEquals("Mismatched partition.", new String[]{"2012_01_01", "OB"}, partitions.get(1).getValues().toArray()); + assertArrayEquals("Mismatched partition.", new String[]{"2012_01_01", "XB"}, partitions.get(2).getValues().toArray()); + + client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); + } + catch (Exception unexpected) { + LOG.error("Unexpected exception!", unexpected); + assertTrue("Unexpected exception! " + unexpected.getMessage(), false); + } + } + + @Test + public void testDropPartitionsWithPartialSpec() throws Exception { + try { + HCatClient client = HCatClient.create(new Configuration(hcatConf)); + final String dbName = "myDb"; + final String tableName = "myTable"; + + client.dropDatabase(dbName, true, HCatClient.DropDBMode.CASCADE); + + client.createDatabase(HCatCreateDBDesc.create(dbName).build()); + List columnSchema = Arrays.asList(new HCatFieldSchema("foo", Type.INT, ""), + new HCatFieldSchema("bar", Type.STRING, "")); + + List partitionSchema = Arrays.asList(new HCatFieldSchema("dt", Type.STRING, ""), + new HCatFieldSchema("grid", Type.STRING, "")); + + client.createTable(HCatCreateTableDesc.create(dbName, tableName, columnSchema).partCols(new ArrayList(partitionSchema)).build()); + + Map partitionSpec = new HashMap(); + partitionSpec.put("grid", "AB"); + partitionSpec.put("dt", "2011_12_31"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + partitionSpec.put("grid", "AB"); + partitionSpec.put("dt", "2012_01_01"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + partitionSpec.put("dt", "2012_01_01"); + partitionSpec.put("grid", "OB"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + partitionSpec.put("dt", "2012_01_01"); + partitionSpec.put("grid", "XB"); + client.addPartition(HCatAddPartitionDesc.create(dbName, tableName, "", partitionSpec).build()); + + Map partialPartitionSpec = new HashMap(); + partialPartitionSpec.put("dt", "2012_01_01"); + + client.dropPartitions(dbName, tableName, partialPartitionSpec, true); + + List partitions = client.getPartitions(dbName, tableName); + assertEquals("Unexpected number of partitions.", 1, partitions.size()); + assertArrayEquals("Mismatched partition.", new String[]{"2011_12_31", "AB"}, partitions.get(0).getValues().toArray()); + + client.dropDatabase(dbName, false, HCatClient.DropDBMode.CASCADE); + } + catch (Exception unexpected) { + LOG.error("Unexpected exception!", unexpected); + assertTrue("Unexpected exception! " + unexpected.getMessage(), false); + } + } + +} diff --git hcatalog/webhcat/svr/pom.xml hcatalog/webhcat/svr/pom.xml index d34724e..71ff8cc 100644 --- hcatalog/webhcat/svr/pom.xml +++ hcatalog/webhcat/svr/pom.xml @@ -22,14 +22,13 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> - org.apache.hcatalog + org.apache.hive.hcatalog hcatalog 0.12.0-SNAPSHOT ../../pom.xml 4.0.0 - org.apache.hcatalog webhcat jar webhcat @@ -53,7 +52,7 @@ --> - org.apache.hcatalog + org.apache.hive.hcatalog hcatalog-core ${hcatalog.version} provided diff --git hcatalog/webhcat/svr/src/main/bin/webhcat_server.sh hcatalog/webhcat/svr/src/main/bin/webhcat_server.sh index 24611c4..098ef08 100644 --- hcatalog/webhcat/svr/src/main/bin/webhcat_server.sh +++ hcatalog/webhcat/svr/src/main/bin/webhcat_server.sh @@ -223,7 +223,7 @@ fi export HADOOP_USER_CLASSPATH_FIRST=true export HADOOP_OPTS="${HADOOP_OPTS} -Dwebhcat.log.dir=$WEBHCAT_LOG_DIR -Dlog4j.configuration=$WEBHCAT_LOG4J" -start_cmd="$HADOOP_PREFIX/bin/hadoop jar $JAR org.apache.hcatalog.templeton.Main " +start_cmd="$HADOOP_PREFIX/bin/hadoop jar $JAR org.apache.hive.hcatalog.templeton.Main " cmd=$1 diff --git hcatalog/webhcat/svr/src/main/config/webhcat-default.xml hcatalog/webhcat/svr/src/main/config/webhcat-default.xml index d60d806..a27419a 100644 --- hcatalog/webhcat/svr/src/main/config/webhcat-default.xml +++ hcatalog/webhcat/svr/src/main/config/webhcat-default.xml @@ -191,7 +191,7 @@ templeton.storage.class - org.apache.hcatalog.templeton.tool.HDFSStorage + org.apache.hive.hcatalog.templeton.tool.HDFSStorage The class to use as storage diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/AppConfig.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/AppConfig.java deleted file mode 100644 index 9e15698..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/AppConfig.java +++ /dev/null @@ -1,223 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.io.File; -import java.net.URL; -import java.util.Map; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.util.VersionInfo; -import org.apache.hcatalog.templeton.tool.JobState; -import org.apache.hcatalog.templeton.tool.ZooKeeperCleanup; -import org.apache.hcatalog.templeton.tool.ZooKeeperStorage; - -/** - * The configuration for Templeton. This merges the normal Hadoop - * configuration with the Templeton specific variables. - * - * The Templeton configuration variables are described in - * templeton-default.xml - * - * The Templeton specific configuration is split into two layers - * - * 1. webhcat-default.xml - All the configuration variables that - * Templeton needs. These are the defaults that ship with the app - * and should only be changed be the app developers. - * - * 2. webhcat-site.xml - The (possibly empty) configuration that the - * system administrator can set variables for their Hadoop cluster. - * - * The configuration files are loaded in this order with later files - * overriding earlier ones. - * - * To find the configuration files, we first attempt to load a file - * from the CLASSPATH and then look in the directory specified in the - * TEMPLETON_HOME environment variable. - * - * In addition the configuration files may access the special env - * variable env for all environment variables. For example, the - * hadoop executable could be specified using: - *

- *      ${env.HADOOP_PREFIX}/bin/hadoop
- *
- */ -public class AppConfig extends Configuration { - public static final String[] HADOOP_CONF_FILENAMES = { - "core-default.xml", "core-site.xml", "mapred-default.xml", "mapred-site.xml", "hdfs-site.xml" - }; - - public static final String[] HADOOP_PREFIX_VARS = { - "HADOOP_PREFIX", "HADOOP_HOME" - }; - - public static final String TEMPLETON_HOME_VAR = "TEMPLETON_HOME"; - - public static final String[] TEMPLETON_CONF_FILENAMES = { - "webhcat-default.xml", - "webhcat-site.xml" - }; - - public static final String PORT = "templeton.port"; - public static final String EXEC_ENCODING_NAME = "templeton.exec.encoding"; - public static final String EXEC_ENVS_NAME = "templeton.exec.envs"; - public static final String EXEC_MAX_BYTES_NAME = "templeton.exec.max-output-bytes"; - public static final String EXEC_MAX_PROCS_NAME = "templeton.exec.max-procs"; - public static final String EXEC_TIMEOUT_NAME = "templeton.exec.timeout"; - public static final String HADOOP_QUEUE_NAME = "templeton.hadoop.queue.name"; - public static final String HADOOP_NAME = "templeton.hadoop"; - public static final String HADOOP_CONF_DIR = "templeton.hadoop.conf.dir"; - public static final String HCAT_NAME = "templeton.hcat"; - public static final String HIVE_ARCHIVE_NAME = "templeton.hive.archive"; - public static final String HIVE_PATH_NAME = "templeton.hive.path"; - public static final String HIVE_PROPS_NAME = "templeton.hive.properties"; - public static final String LIB_JARS_NAME = "templeton.libjars"; - public static final String PIG_ARCHIVE_NAME = "templeton.pig.archive"; - public static final String PIG_PATH_NAME = "templeton.pig.path"; - public static final String STREAMING_JAR_NAME = "templeton.streaming.jar"; - public static final String TEMPLETON_JAR_NAME = "templeton.jar"; - public static final String OVERRIDE_JARS_NAME = "templeton.override.jars"; - public static final String OVERRIDE_JARS_ENABLED = "templeton.override.enabled"; - public static final String TEMPLETON_CONTROLLER_MR_CHILD_OPTS - = "templeton.controller.mr.child.opts"; - - public static final String KERBEROS_SECRET = "templeton.kerberos.secret"; - public static final String KERBEROS_PRINCIPAL = "templeton.kerberos.principal"; - public static final String KERBEROS_KEYTAB = "templeton.kerberos.keytab"; - - public static final String CALLBACK_INTERVAL_NAME - = "templeton.callback.retry.interval"; - public static final String CALLBACK_RETRY_NAME - = "templeton.callback.retry.attempts"; - - //Hadoop property names (set by templeton logic) - public static final String HADOOP_END_INTERVAL_NAME = "job.end.retry.interval"; - public static final String HADOOP_END_RETRY_NAME = "job.end.retry.attempts"; - public static final String HADOOP_END_URL_NAME = "job.end.notification.url"; - public static final String HADOOP_SPECULATIVE_NAME - = "mapred.map.tasks.speculative.execution"; - public static final String HADOOP_CHILD_JAVA_OPTS = "mapred.child.java.opts"; - public static final String UNIT_TEST_MODE = "templeton.unit.test.mode"; - - - private static final Log LOG = LogFactory.getLog(AppConfig.class); - - public AppConfig() { - init(); - LOG.info("Using Hadoop version " + VersionInfo.getVersion()); - } - - private void init() { - for (Map.Entry e : System.getenv().entrySet()) - set("env." + e.getKey(), e.getValue()); - - String templetonDir = getTempletonDir(); - for (String fname : TEMPLETON_CONF_FILENAMES) - if (! loadOneClasspathConfig(fname)) - loadOneFileConfig(templetonDir, fname); - - String hadoopConfDir = getHadoopConfDir(); - for (String fname : HADOOP_CONF_FILENAMES) - loadOneFileConfig(hadoopConfDir, fname); - ProxyUserSupport.processProxyuserConfig(this); - } - - public void startCleanup() { - JobState.getStorageInstance(this).startCleanup(this); - } - - public String getHadoopConfDir() { - return get(HADOOP_CONF_DIR); - } - - public static String getTempletonDir() { - return System.getenv(TEMPLETON_HOME_VAR); - } - - private boolean loadOneFileConfig(String dir, String fname) { - if (dir != null) { - File f = new File(dir, fname); - if (f.exists()) { - addResource(new Path(f.getAbsolutePath())); - LOG.debug("loaded config file " + f.getAbsolutePath()); - return true; - } - } - return false; - } - - private boolean loadOneClasspathConfig(String fname) { - URL x = getResource(fname); - if (x != null) { - addResource(x); - LOG.debug("loaded config from classpath " + x); - return true; - } - - return false; - } - - public String templetonJar() { return get(TEMPLETON_JAR_NAME); } - public String libJars() { return get(LIB_JARS_NAME); } - public String hadoopQueueName() { return get(HADOOP_QUEUE_NAME); } - public String clusterHadoop() { return get(HADOOP_NAME); } - public String clusterHcat() { return get(HCAT_NAME); } - public String pigPath() { return get(PIG_PATH_NAME); } - public String pigArchive() { return get(PIG_ARCHIVE_NAME); } - public String hivePath() { return get(HIVE_PATH_NAME); } - public String hiveArchive() { return get(HIVE_ARCHIVE_NAME); } - public String streamingJar() { return get(STREAMING_JAR_NAME); } - public String kerberosSecret() { return get(KERBEROS_SECRET); } - public String kerberosPrincipal(){ return get(KERBEROS_PRINCIPAL); } - public String kerberosKeytab() { return get(KERBEROS_KEYTAB); } - public String controllerMRChildOpts() { - return get(TEMPLETON_CONTROLLER_MR_CHILD_OPTS); - } - - - - public String[] overrideJars() { - if (getBoolean(OVERRIDE_JARS_ENABLED, true)) - return getStrings(OVERRIDE_JARS_NAME); - else - return null; - } - public String overrideJarsString() { - if (getBoolean(OVERRIDE_JARS_ENABLED, true)) - return get(OVERRIDE_JARS_NAME); - else - return null; - } - - public long zkCleanupInterval() { - return getLong(ZooKeeperCleanup.ZK_CLEANUP_INTERVAL, - (1000L * 60L * 60L * 12L)); - } - - public long zkMaxAge() { - return getLong(ZooKeeperCleanup.ZK_CLEANUP_MAX_AGE, - (1000L * 60L * 60L * 24L * 7L)); - } - - public String zkHosts() { return get(ZooKeeperStorage.ZK_HOSTS); } - public int zkSessionTimeout() { return getInt(ZooKeeperStorage.ZK_SESSION_TIMEOUT, 30000); } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/BadParam.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/BadParam.java deleted file mode 100644 index 50f0b75..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/BadParam.java +++ /dev/null @@ -1,30 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import org.eclipse.jetty.http.HttpStatus; - -/** - * Missing required or badly configured paramater. - */ -public class BadParam extends SimpleWebException { - public BadParam(String msg) { - super(HttpStatus.BAD_REQUEST_400, msg); - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/BusyException.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/BusyException.java deleted file mode 100644 index 0ce15dd..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/BusyException.java +++ /dev/null @@ -1,30 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import org.eclipse.jetty.http.HttpStatus; - -/** - * Simple "we are busy, try again" exception. - */ -public class BusyException extends SimpleWebException { - public BusyException() { - super(HttpStatus.SERVICE_UNAVAILABLE_503, "Busy, please retry"); - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/CallbackFailedException.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/CallbackFailedException.java deleted file mode 100644 index 7c6f10d..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/CallbackFailedException.java +++ /dev/null @@ -1,30 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import org.eclipse.jetty.http.HttpStatus; - -/** - * The callback failed when it tried to reach the callback URL. - */ -public class CallbackFailedException extends SimpleWebException { - public CallbackFailedException(String msg) { - super(HttpStatus.BAD_REQUEST_400, msg); - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/CatchallExceptionMapper.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/CatchallExceptionMapper.java deleted file mode 100644 index aef0231..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/CatchallExceptionMapper.java +++ /dev/null @@ -1,47 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import javax.ws.rs.core.Response; -import javax.ws.rs.ext.ExceptionMapper; -import javax.ws.rs.ext.Provider; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.eclipse.jetty.http.HttpStatus; - -import com.sun.jersey.api.NotFoundException; - -/** - * Map all exceptions to the Jersey response. This lets us have nice - * results in the error body. - */ -@Provider -public class CatchallExceptionMapper - implements ExceptionMapper { - private static final Log LOG = LogFactory.getLog(CatchallExceptionMapper.class); - - public Response toResponse(Exception e) { - LOG.error(e.getMessage(), e); - if (e instanceof NotFoundException) { - return SimpleWebException.buildMessage(HttpStatus.NOT_FOUND_404, null, e.getMessage()); - } - return SimpleWebException.buildMessage(HttpStatus.INTERNAL_SERVER_ERROR_500, null, e.getMessage()); - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/ColumnDesc.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/ColumnDesc.java deleted file mode 100644 index b13b370..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/ColumnDesc.java +++ /dev/null @@ -1,60 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import javax.xml.bind.annotation.XmlRootElement; - -/** - * A description of the column to create. - */ -@XmlRootElement -public class ColumnDesc extends GroupPermissionsDesc { - public String name; - public String type; - public String comment; - - public ColumnDesc() {} - - /** - * Create a new ColumnDesc - */ - public ColumnDesc(String name, String type, String comment) { - this.name = name; - this.type = type; - this.comment = comment; - } - - public String toString() { - return String.format("ColumnDesc(name=%s, type=%s, comment=%s)", - name, type, comment); - } - - public boolean equals(Object o) { - if (this == o) - return true; - if (! (o instanceof ColumnDesc)) - return false; - ColumnDesc that = (ColumnDesc) o; - return xequals(this.name, that.name) - && xequals(this.type, that.type) - && xequals(this.comment, that.comment) - && super.equals(that) - ; - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/CompleteBean.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/CompleteBean.java deleted file mode 100644 index 6f2315b..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/CompleteBean.java +++ /dev/null @@ -1,37 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -/** - * CompleteBean - The results of an CompleteDelegator run. - */ -public class CompleteBean { - public String status; - - public CompleteBean() {} - - /** - * Create a new CompleteBean - * - * @param status run status - */ - public CompleteBean(String status) { - this.status = status; - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/CompleteDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/CompleteDelegator.java deleted file mode 100644 index 7662d17..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/CompleteDelegator.java +++ /dev/null @@ -1,113 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.io.IOException; -import java.net.URL; -import java.net.MalformedURLException; -import java.util.Date; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hcatalog.templeton.tool.JobState; -import org.apache.hcatalog.templeton.tool.TempletonUtils; - -/** - * Complete a job. This will run the callback if - * - * - the job is done - * - there is a callback - * - the callback has not yet been called - * - * There is a small chance for a race condition if two callers run - * this at the same time. That should never happen. - * - * We use a Hadoop config var to notify this class on the completion - * of a job. Hadoop will call use multiple times in the event of - * failure. Even if the failure is that the client callback failed. - * - * See LauncherDelegator for the HADOOP_END_RETRY* vars that are set. - */ -public class CompleteDelegator extends TempletonDelegator { - private static final Log LOG = LogFactory.getLog(CompleteDelegator.class); - - public CompleteDelegator(AppConfig appConf) { - super(appConf); - } - - public CompleteBean run(String id) - throws CallbackFailedException, IOException { - if (id == null) - acceptWithError("No jobid given"); - - JobState state = null; - try { - state = new JobState(id, Main.getAppConfigInstance()); - if (state.getCompleteStatus() == null) - failed("Job not yet complete", null); - - Long notified = state.getNotifiedTime(); - if (notified != null) - return acceptWithError("Callback already run on " - + new Date(notified.longValue())); - - String callback = state.getCallback(); - if (callback == null) - return new CompleteBean("No callback registered"); - - try { - doCallback(state.getId(), callback); - } catch (Exception e) { - failed("Callback failed " + callback + " for " + id, e); - } - - state.setNotifiedTime(System.currentTimeMillis()); - return new CompleteBean("Callback sent"); - } finally { - if (state != null) - state.close(); - } - } - - /** - * Call the callback url with the jobid to let them know it's - * finished. If the url has the string $jobId in it, it will be - * replaced with the completed jobid. - */ - public static void doCallback(String jobid, String url) - throws MalformedURLException, IOException { - if (url.contains("$jobId")) - url = url.replace("$jobId", jobid); - TempletonUtils.fetchUrl(new URL(url)); - } - - private void failed(String msg, Exception e) - throws CallbackFailedException { - if (e != null) - LOG.error(msg, e); - else - LOG.error(msg); - throw new CallbackFailedException(msg); - } - - private CompleteBean acceptWithError(String msg) { - LOG.error(msg); - return new CompleteBean(msg); - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/DatabaseDesc.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/DatabaseDesc.java deleted file mode 100644 index 677d44f..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/DatabaseDesc.java +++ /dev/null @@ -1,42 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.util.Map; - -import javax.xml.bind.annotation.XmlRootElement; - -/** - * A description of the database to create. - */ -@XmlRootElement -public class DatabaseDesc extends GroupPermissionsDesc { - public boolean ifNotExists; - public String database; - public String comment; - public String location; - public Map properties; - - public DatabaseDesc() {} - - public String toString() { - return String.format("DatabaseDesc(database=%s, comment=%s, location=%s, " + - "properties=%s)", database, comment, location, properties); - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/DeleteDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/DeleteDelegator.java deleted file mode 100644 index 990245a..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/DeleteDelegator.java +++ /dev/null @@ -1,63 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.io.IOException; - -import org.apache.hadoop.hive.shims.HadoopShims.WebHCatJTShim; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.mapred.JobID; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hcatalog.templeton.tool.JobState; - -/** - * Delete a job - */ -public class DeleteDelegator extends TempletonDelegator { - public DeleteDelegator(AppConfig appConf) { - super(appConf); - } - - public QueueStatusBean run(String user, String id) - throws NotAuthorizedException, BadParam, IOException, InterruptedException - { - UserGroupInformation ugi = UgiFactory.getUgi(user); - WebHCatJTShim tracker = null; - JobState state = null; - try { - tracker = ShimLoader.getHadoopShims().getWebHCatShim(appConf, ugi); - JobID jobid = StatusDelegator.StringToJobID(id); - if (jobid == null) - throw new BadParam("Invalid jobid: " + id); - tracker.killJob(jobid); - state = new JobState(id, Main.getAppConfigInstance()); - String childid = state.getChildId(); - if (childid != null) - tracker.killJob(StatusDelegator.StringToJobID(childid)); - return StatusDelegator.makeStatus(tracker, jobid, state); - } catch (IllegalStateException e) { - throw new BadParam(e.getMessage()); - } finally { - if (tracker != null) - tracker.close(); - if (state != null) - state.close(); - } - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/EnqueueBean.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/EnqueueBean.java deleted file mode 100644 index 26e0357..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/EnqueueBean.java +++ /dev/null @@ -1,37 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -/** - * EnqueueBean - The results of a call that enqueues a Hadoop job. - */ -public class EnqueueBean { - public String id; - - public EnqueueBean() {} - - /** - * Create a new EnqueueBean. - * - * @param id job id - */ - public EnqueueBean(String id) { - this.id = id; - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/ExecBean.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/ExecBean.java deleted file mode 100644 index 861af87..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/ExecBean.java +++ /dev/null @@ -1,48 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -/** - * ExecBean - The results of an exec call. - */ -public class ExecBean { - public String stdout; - public String stderr; - public int exitcode; - - public ExecBean() {} - - /** - * Create a new ExecBean. - * - * @param stdout standard output of the the program. - * @param stderr error output of the the program. - * @param exitcode exit code of the program. - */ - public ExecBean(String stdout, String stderr, int exitcode) { - this.stdout = stdout; - this.stderr = stderr; - this.exitcode = exitcode; - } - - public String toString() { - return String.format("ExecBean(stdout=%s, stderr=%s, exitcode=%s)", - stdout, stderr, exitcode); - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/ExecService.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/ExecService.java deleted file mode 100644 index 0fd679e..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/ExecService.java +++ /dev/null @@ -1,35 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.io.IOException; -import java.util.List; -import java.util.Map; - -import org.apache.commons.exec.ExecuteException; - -public interface ExecService { - public ExecBean run(String program, List args, - Map env) - throws NotAuthorizedException, BusyException, ExecuteException, IOException; - - public ExecBean runUnlimited(String program, List args, - Map env) - throws NotAuthorizedException, ExecuteException, IOException; -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/ExecServiceImpl.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/ExecServiceImpl.java deleted file mode 100644 index 059fd28..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/ExecServiceImpl.java +++ /dev/null @@ -1,194 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.IOException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.Semaphore; - -import org.apache.commons.exec.CommandLine; -import org.apache.commons.exec.DefaultExecutor; -import org.apache.commons.exec.ExecuteException; -import org.apache.commons.exec.ExecuteWatchdog; -import org.apache.commons.exec.PumpStreamHandler; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -/** - * Execute a local program. This is a singleton service that will - * execute programs as non-privileged users on the local box. See - * ExecService.run and ExecService.runUnlimited for details. - */ -public class ExecServiceImpl implements ExecService { - private static final Log LOG = LogFactory.getLog(ExecServiceImpl.class); - private static AppConfig appConf = Main.getAppConfigInstance(); - - private static volatile ExecServiceImpl theSingleton; - - /** - * Retrieve the singleton. - */ - public static synchronized ExecServiceImpl getInstance() { - if (theSingleton == null) { - theSingleton = new ExecServiceImpl(); - } - return theSingleton; - } - - private Semaphore avail; - - private ExecServiceImpl() { - avail = new Semaphore(appConf.getInt(AppConfig.EXEC_MAX_PROCS_NAME, 16)); - } - - /** - * Run the program synchronously as the given user. We rate limit - * the number of processes that can simultaneously created for - * this instance. - * - * @param program The program to run - * @param args Arguments to pass to the program - * @param env Any extra environment variables to set - * @return The result of the run. - */ - public ExecBean run(String program, List args, - Map env) - throws NotAuthorizedException, BusyException, ExecuteException, IOException { - boolean aquired = false; - try { - aquired = avail.tryAcquire(); - if (aquired) { - return runUnlimited(program, args, env); - } else { - throw new BusyException(); - } - } finally { - if (aquired) { - avail.release(); - } - } - } - - /** - * Run the program synchronously as the given user. Warning: - * CommandLine will trim the argument strings. - * - * @param program The program to run. - * @param args Arguments to pass to the program - * @param env Any extra environment variables to set - * @return The result of the run. - */ - public ExecBean runUnlimited(String program, List args, - Map env) - throws NotAuthorizedException, ExecuteException, IOException { - try { - return auxRun(program, args, env); - } catch (IOException e) { - File cwd = new java.io.File("."); - if (cwd.canRead() && cwd.canWrite()) - throw e; - else - throw new IOException("Invalid permissions on Templeton directory: " - + cwd.getCanonicalPath()); - } - } - - private ExecBean auxRun(String program, List args, Map env) - throws NotAuthorizedException, ExecuteException, IOException { - DefaultExecutor executor = new DefaultExecutor(); - executor.setExitValues(null); - - // Setup stdout and stderr - int nbytes = appConf.getInt(AppConfig.EXEC_MAX_BYTES_NAME, -1); - ByteArrayOutputStream outStream = new MaxByteArrayOutputStream(nbytes); - ByteArrayOutputStream errStream = new MaxByteArrayOutputStream(nbytes); - executor.setStreamHandler(new PumpStreamHandler(outStream, errStream)); - - // Only run for N milliseconds - int timeout = appConf.getInt(AppConfig.EXEC_TIMEOUT_NAME, 0); - ExecuteWatchdog watchdog = new ExecuteWatchdog(timeout); - executor.setWatchdog(watchdog); - - CommandLine cmd = makeCommandLine(program, args); - - LOG.info("Running: " + cmd); - ExecBean res = new ExecBean(); - res.exitcode = executor.execute(cmd, execEnv(env)); - String enc = appConf.get(AppConfig.EXEC_ENCODING_NAME); - res.stdout = outStream.toString(enc); - res.stderr = errStream.toString(enc); - - return res; - } - - private CommandLine makeCommandLine(String program, - List args) - throws NotAuthorizedException, IOException { - String path = validateProgram(program); - CommandLine cmd = new CommandLine(path); - if (args != null) - for (String arg : args) - cmd.addArgument(arg, false); - - return cmd; - } - - /** - * Build the environment used for all exec calls. - * - * @return The environment variables. - */ - public Map execEnv(Map env) { - HashMap res = new HashMap(); - - for (String key : appConf.getStrings(AppConfig.EXEC_ENVS_NAME)) { - String val = System.getenv(key); - if (val != null) { - res.put(key, val); - } - } - if (env != null) - res.putAll(env); - for (Map.Entry envs : res.entrySet()) { - LOG.info("Env " + envs.getKey() + "=" + envs.getValue()); - } - return res; - } - - /** - * Given a program name, lookup the fully qualified path. Throws - * an exception if the program is missing or not authorized. - * - * @param path The path of the program. - * @return The path of the validated program. - */ - public String validateProgram(String path) - throws NotAuthorizedException, IOException { - File f = new File(path); - if (f.canExecute()) { - return f.getCanonicalPath(); - } else { - throw new NotAuthorizedException("Unable to access program: " + path); - } - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/GroupPermissionsDesc.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/GroupPermissionsDesc.java deleted file mode 100644 index e813d65..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/GroupPermissionsDesc.java +++ /dev/null @@ -1,55 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -/** - * The base create permissions for ddl objects. - */ -public abstract class GroupPermissionsDesc { - public String group; - public String permissions; - - public GroupPermissionsDesc() {} - - protected static boolean xequals(Object a, Object b) { - if (a == null) { - if (b == null) - return true; - else - return false; - } - - return a.equals(b); - } - - protected static boolean xequals(boolean a, boolean b) { return a == b; } - protected static boolean xequals(int a, int b) { return a == b; } - protected static boolean xequals(char a, char b) { return a == b; } - - public boolean equals(Object o) { - if (this == o) - return true; - if (! (o instanceof GroupPermissionsDesc)) - return false; - GroupPermissionsDesc that = (GroupPermissionsDesc) o; - return xequals(this.group, that.group) - && xequals(this.permissions, that.permissions) - ; - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/HcatDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/HcatDelegator.java deleted file mode 100644 index a364992..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/HcatDelegator.java +++ /dev/null @@ -1,853 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.io.IOException; -import java.net.URI; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import javax.ws.rs.core.Response; - -import org.apache.commons.exec.ExecuteException; -import org.apache.commons.lang.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.ErrorMsg; -import org.apache.hcatalog.templeton.tool.TempletonUtils; -import org.eclipse.jetty.http.HttpStatus; - - -/** - * Run hcat on the local server using the ExecService. This is - * the backend of the ddl web service. - */ -public class HcatDelegator extends LauncherDelegator { - private static final Log LOG = LogFactory.getLog(HcatDelegator.class); - private ExecService execService; - - public HcatDelegator(AppConfig appConf, ExecService execService) { - super(appConf); - this.execService = execService; - } - - /** - * Run the local hcat executable. - */ - public ExecBean run(String user, String exec, boolean format, - String group, String permissions) - throws NotAuthorizedException, BusyException, ExecuteException, IOException { - SecureProxySupport proxy = new SecureProxySupport(); - try { - List args = makeArgs(exec, format, group, permissions); - proxy.open(user, appConf); - - // Setup the hadoop vars to specify the user. - String cp = makeOverrideClasspath(appConf); - Map env = TempletonUtils.hadoopUserEnv(user, cp); - proxy.addEnv(env); - proxy.addArgs(args); - return execService.run(appConf.clusterHcat(), args, env); - } catch (InterruptedException e) { - throw new IOException(e); - } finally { - if (proxy != null) - proxy.close(); - } - } - - private List makeArgs(String exec, boolean format, - String group, String permissions) { - ArrayList args = new ArrayList(); - args.add("-e"); - args.add(exec); - if (TempletonUtils.isset(group)) { - args.add("-g"); - args.add(group); - } - if (TempletonUtils.isset(permissions)) { - args.add("-p"); - args.add(permissions); - } - if (format) { - args.add("-D"); - args.add("hive.ddl.output.format=json"); - // Use both args to ease development. Delete this one on - // May 1. - args.add("-D"); - args.add("hive.format=json"); - } - LOG.info("Main.getAppConfigInstance().get(AppConfig.UNIT_TEST_MODE)=" + - Main.getAppConfigInstance().get(AppConfig.UNIT_TEST_MODE)); - if(System.getProperty("hive.metastore.warehouse.dir") != null) { - /*when running in unit test mode, pass this property to HCat, - which will in turn pass it to Hive to make sure that Hive - tries to write to a directory that exists.*/ - args.add("-D"); - args.add("hive.metastore.warehouse.dir=" + System.getProperty("hive.metastore.warehouse.dir")); - } - return args; - } - - /** - * Return a json description of the database. - */ - public Response descDatabase(String user, String db, boolean extended) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = "desc database " + db + "; "; - if (extended) - exec = "desc database extended " + db + "; "; - - try { - String res = jsonRun(user, exec); - return JsonBuilder.create(res).build(); - } catch (HcatException e) { - throw new HcatException("unable to describe database: " + db, - e.execBean, exec); - } - } - - /** - * Return a json "show databases like". This will return a list of - * databases. - */ - public Response listDatabases(String user, String dbPattern) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = String.format("show databases like '%s';", dbPattern); - try { - String res = jsonRun(user, exec); - return JsonBuilder.create(res) - .build(); - } catch (HcatException e) { - throw new HcatException("unable to show databases for: " + dbPattern, - e.execBean, exec); - } - } - - /** - * Create a database with the given name - */ - public Response createDatabase(String user, DatabaseDesc desc) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = "create database"; - if (desc.ifNotExists) - exec += " if not exists"; - exec += " " + desc.database; - if (TempletonUtils.isset(desc.comment)) - exec += String.format(" comment '%s'", desc.comment); - if (TempletonUtils.isset(desc.location)) - exec += String.format(" location '%s'", desc.location); - if (TempletonUtils.isset(desc.properties)) - exec += String.format(" with dbproperties (%s)", - makePropertiesStatement(desc.properties)); - exec += ";"; - - String res = jsonRun(user, exec, desc.group, desc.permissions); - return JsonBuilder.create(res) - .put("database", desc.database) - .build(); - } - - /** - * Drop the given database - */ - public Response dropDatabase(String user, String db, - boolean ifExists, String option, - String group, String permissions) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = "drop database"; - if (ifExists) - exec += " if exists"; - exec += " " + db; - if (TempletonUtils.isset(option)) - exec += " " + option; - exec += ";"; - - String res = jsonRun(user, exec, group, permissions); - return JsonBuilder.create(res) - .put("database", db) - .build(); - } - - /** - * Create a table. - */ - public Response createTable(String user, String db, TableDesc desc) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = makeCreateTable(db, desc); - - try { - String res = jsonRun(user, exec, desc.group, desc.permissions, true); - - return JsonBuilder.create(res) - .put("database", db) - .put("table", desc.table) - .build(); - } catch (final HcatException e) { - throw new HcatException("unable to create table: " + desc.table, - e.execBean, exec); - } - } - - /** - * Create a table like another. - */ - public Response createTableLike(String user, String db, TableLikeDesc desc) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = String.format("use %s; create", db); - - if (desc.external) - exec += " external"; - exec += String.format(" table %s like %s", desc.newTable, desc.existingTable); - if (TempletonUtils.isset(desc.location)) - exec += String.format(" location '%s'", desc.location); - exec += ";"; - - try { - String res = jsonRun(user, exec, desc.group, desc.permissions, true); - - return JsonBuilder.create(res) - .put("database", db) - .put("table", desc.newTable) - .build(); - } catch (final HcatException e) { - throw new HcatException("unable to create table: " + desc.newTable, - e.execBean, exec); - } - } - - /** - * Return a json description of the table. - */ - public Response descTable(String user, String db, String table, boolean extended) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = "use " + db + "; "; - if (extended) - exec += "desc extended " + table + "; "; - else - exec += "desc " + table + "; "; - try { - String res = jsonRun(user, exec); - return JsonBuilder.create(res) - .put("database", db) - .put("table", table) - .build(); - } catch (HcatException e) { - throw new HcatException("unable to describe database: " + db, - e.execBean, exec); - } - } - - /** - * Return a json "show table like". This will return a list of - * tables. - */ - public Response listTables(String user, String db, String tablePattern) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = String.format("use %s; show tables like '%s';", - db, tablePattern); - try { - String res = jsonRun(user, exec); - return JsonBuilder.create(res) - .put("database", db) - .build(); - } catch (HcatException e) { - throw new HcatException("unable to show tables for: " + tablePattern, - e.execBean, exec); - } - } - - /** - * Return a json "show table extended like". This will return - * only the first single table. - */ - public Response descExtendedTable(String user, String db, String table) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = String.format("use %s; show table extended like %s;", - db, table); - try { - String res = jsonRun(user, exec); - JsonBuilder jb = JsonBuilder.create(singleTable(res, table)) - .remove("tableName") - .put("database", db) - .put("table", table); - - // If we can get them from HDFS, add group and permission - String loc = (String) jb.getMap().get("location"); - if (loc != null && loc.startsWith("hdfs://")) { - try { - FileSystem fs = FileSystem.get(appConf); - FileStatus status = fs.getFileStatus(new Path(new URI(loc))); - jb.put("group", status.getGroup()); - jb.put("permission", status.getPermission().toString()); - } catch (Exception e) { - LOG.warn(e.getMessage() + " Couldn't get permissions for " + loc); - } - } - return jb.build(); - } catch (HcatException e) { - throw new HcatException("unable to show table: " + table, e.execBean, exec); - } - } - - // Format a list of Columns for a create statement - private String makeCols(List cols) { - ArrayList res = new ArrayList(); - for (ColumnDesc col : cols) - res.add(makeOneCol(col)); - return StringUtils.join(res, ", "); - } - - // Format a Column for a create statement - private String makeOneCol(ColumnDesc col) { - String res = String.format("%s %s", col.name, col.type); - if (TempletonUtils.isset(col.comment)) - res += String.format(" comment '%s'", col.comment); - return res; - } - - // Make a create table statement - private String makeCreateTable(String db, TableDesc desc) { - String exec = String.format("use %s; create", db); - - if (desc.external) - exec += " external"; - exec += " table"; - if (desc.ifNotExists) - exec += " if not exists"; - exec += " " + desc.table; - - if (TempletonUtils.isset(desc.columns)) - exec += String.format("(%s)", makeCols(desc.columns)); - if (TempletonUtils.isset(desc.comment)) - exec += String.format(" comment '%s'", desc.comment); - if (TempletonUtils.isset(desc.partitionedBy)) - exec += String.format(" partitioned by (%s)", makeCols(desc.partitionedBy)); - if (desc.clusteredBy != null) - exec += String.format(" clustered by %s", makeClusteredBy(desc.clusteredBy)); - if (desc.format != null) - exec += " " + makeStorageFormat(desc.format); - if (TempletonUtils.isset(desc.location)) - exec += String.format(" location '%s'", desc.location); - if (TempletonUtils.isset(desc.tableProperties)) - exec += String.format(" tblproperties (%s)", - makePropertiesStatement(desc.tableProperties)); - exec += ";"; - - return exec; - } - - // Format a clustered by statement - private String makeClusteredBy(TableDesc.ClusteredByDesc desc) { - String res = String.format("(%s)", StringUtils.join(desc.columnNames, ", ")); - if (TempletonUtils.isset(desc.sortedBy)) - res += String.format(" sorted by (%s)", makeClusterSortList(desc.sortedBy)); - res += String.format(" into %s buckets", desc.numberOfBuckets); - - return res; - } - - // Format a sorted by statement - private String makeClusterSortList(List descs) { - ArrayList res = new ArrayList(); - for (TableDesc.ClusterSortOrderDesc desc : descs) - res.add(makeOneClusterSort(desc)); - return StringUtils.join(res, ", "); - } - - // Format a single cluster sort statement - private String makeOneClusterSort(TableDesc.ClusterSortOrderDesc desc) { - return String.format("%s %s", desc.columnName, desc.order.toString()); - } - - // Format the storage format statements - private String makeStorageFormat(TableDesc.StorageFormatDesc desc) { - String res = ""; - - if (desc.rowFormat != null) - res += makeRowFormat(desc.rowFormat); - if (TempletonUtils.isset(desc.storedAs)) - res += String.format(" stored as %s", desc.storedAs); - if (desc.storedBy != null) - res += " " + makeStoredBy(desc.storedBy); - - return res; - } - - // Format the row format statement - private String makeRowFormat(TableDesc.RowFormatDesc desc) { - String res = - makeTermBy(desc.fieldsTerminatedBy, "fields") - + makeTermBy(desc.collectionItemsTerminatedBy, "collection items") - + makeTermBy(desc.mapKeysTerminatedBy, "map keys") - + makeTermBy(desc.linesTerminatedBy, "lines"); - - if (TempletonUtils.isset(res)) - return "row format delimited" + res; - else if (desc.serde != null) - return makeSerdeFormat(desc.serde); - else - return ""; - } - - // A row format terminated by clause - private String makeTermBy(String sep, String fieldName) { - - if (TempletonUtils.isset(sep)) - return String.format(" %s terminated by '%s'", fieldName, sep); - else - return ""; - } - - // Format the serde statement - private String makeSerdeFormat(TableDesc.SerdeDesc desc) { - String res = "row format serde " + desc.name; - if (TempletonUtils.isset(desc.properties)) - res += String.format(" with serdeproperties (%s)", - makePropertiesStatement(desc.properties)); - return res; - } - - // Format the properties statement - private String makePropertiesStatement(Map properties) { - ArrayList res = new ArrayList(); - for (Map.Entry e : properties.entrySet()) - res.add(String.format("'%s'='%s'", e.getKey(), e.getValue())); - return StringUtils.join(res, ", "); - } - - // Format the stored by statement - private String makeStoredBy(TableDesc.StoredByDesc desc) { - String res = String.format("stored by '%s'", desc.className); - if (TempletonUtils.isset(desc.properties)) - res += String.format(" with serdeproperties (%s)", - makePropertiesStatement(desc.properties)); - return res; - } - - // Pull out the first table from the "show extended" json. - private String singleTable(String json, String table) - throws IOException { - Map obj = JsonBuilder.jsonToMap(json); - if (JsonBuilder.isError(obj)) - return json; - - List tables = (List) obj.get("tables"); - if (TempletonUtils.isset(tables)) - return JsonBuilder.mapToJson(tables.get(0)); - else { - return JsonBuilder - .createError(ErrorMsg.INVALID_TABLE.format(table), - ErrorMsg.INVALID_TABLE.getErrorCode()). - buildJson(); - } - } - - /** - * Drop a table. - */ - public Response dropTable(String user, String db, - String table, boolean ifExists, - String group, String permissions) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = String.format("use %s; drop table", db); - if (ifExists) - exec += " if exists"; - exec += String.format(" %s;", table); - - try { - String res = jsonRun(user, exec, group, permissions, true); - return JsonBuilder.create(res) - .put("database", db) - .put("table", table) - .build(); - } catch (HcatException e) { - throw new HcatException("unable to drop table: " + table, e.execBean, exec); - } - } - - /** - * Rename a table. - */ - public Response renameTable(String user, String db, - String oldTable, String newTable, - String group, String permissions) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = String.format("use %s; alter table %s rename to %s;", - db, oldTable, newTable); - try { - String res = jsonRun(user, exec, group, permissions, true); - return JsonBuilder.create(res) - .put("database", db) - .put("table", newTable) - .build(); - } catch (HcatException e) { - throw new HcatException("unable to rename table: " + oldTable, - e.execBean, exec); - } - } - - /** - * Describe one table property. - */ - public Response descTableProperty(String user, String db, - String table, String property) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - Response res = descTable(user, db, table, true); - if (res.getStatus() != HttpStatus.OK_200) - return res; - Map props = tableProperties(res.getEntity()); - Map found = null; - if (props != null) { - String value = (String) props.get(property); - if (value != null) { - found = new HashMap(); - found.put(property, value); - } - } - - return JsonBuilder.create() - .put("database", db) - .put("table", table) - .put("property", found) - .build(); - } - - /** - * List the table properties. - */ - public Response listTableProperties(String user, String db, String table) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - Response res = descTable(user, db, table, true); - if (res.getStatus() != HttpStatus.OK_200) - return res; - Map props = tableProperties(res.getEntity()); - return JsonBuilder.create() - .put("database", db) - .put("table", table) - .put("properties", props) - .build(); - } - - /** - * Add one table property. - */ - public Response addOneTableProperty(String user, String db, String table, - TablePropertyDesc desc) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec - = String.format("use %s; alter table %s set tblproperties ('%s'='%s');", - db, table, desc.name, desc.value); - try { - String res = jsonRun(user, exec, desc.group, desc.permissions, true); - return JsonBuilder.create(res) - .put("database", db) - .put("table", table) - .put("property", desc.name) - .build(); - } catch (HcatException e) { - throw new HcatException("unable to add table property: " + table, - e.execBean, exec); - } - } - - private Map tableProperties(Object extendedTable) { - if (!(extendedTable instanceof Map)) - return null; - Map m = (Map) extendedTable; - Map tableInfo = (Map) m.get("tableInfo"); - if (tableInfo == null) - return null; - - return (Map) tableInfo.get("parameters"); - } - - /** - * Return a json description of the partitions. - */ - public Response listPartitions(String user, String db, String table) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = "use " + db + "; "; - exec += "show partitions " + table + "; "; - try { - String res = jsonRun(user, exec); - return JsonBuilder.create(res) - .put("database", db) - .put("table", table) - .build(); - } catch (HcatException e) { - throw new HcatException("unable to show partitions for table: " + table, - e.execBean, exec); - } - } - - /** - * Return a json description of one partition. - */ - public Response descOnePartition(String user, String db, String table, - String partition) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = "use " + db + "; "; - exec += "show table extended like " + table - + " partition (" + partition + "); "; - try { - String res = jsonRun(user, exec); - return JsonBuilder.create(singleTable(res, table)) - .remove("tableName") - .put("database", db) - .put("table", table) - .put("partition", partition) - .build(); - } catch (HcatException e) { - if (e.execBean.stderr.contains("SemanticException") && - e.execBean.stderr.contains("Partition not found")) { - String emsg = "Partition " + partition + " for table " - + table + " does not exist" + db + "." + table + " does not exist"; - return JsonBuilder.create() - .put("error", emsg) - //this error should really be produced by Hive (DDLTask) - .put("errorCode", ErrorMsg.INVALID_PARTITION.getErrorCode()) - .put("database", db) - .put("table", table) - .put("partition", partition) - .build(); - } - - throw new HcatException("unable to show partition: " - + table + " " + partition, - e.execBean, - exec); - } - } - - /** - * Add one partition. - */ - public Response addOnePartition(String user, String db, String table, - PartitionDesc desc) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = String.format("use %s; alter table %s add", db, table); - if (desc.ifNotExists) - exec += " if not exists"; - exec += String.format(" partition (%s)", desc.partition); - if (TempletonUtils.isset(desc.location)) - exec += String.format(" location '%s'", desc.location); - exec += ";"; - try { - String res = jsonRun(user, exec, desc.group, desc.permissions, true); - if (res.indexOf("AlreadyExistsException") > -1) { - return JsonBuilder.create(). - put("error", "Partition already exists") - //This error code should really be produced by Hive - .put("errorCode", ErrorMsg.PARTITION_EXISTS.getErrorCode()) - .put("database", db) - .put("table", table) - .put("partition", desc.partition).build(); - } - return JsonBuilder.create(res) - .put("database", db) - .put("table", table) - .put("partition", desc.partition) - .build(); - } catch (HcatException e) { - throw new HcatException("unable to add partition: " + desc, - e.execBean, exec); - } - } - - /** - * Drop a partition. - */ - public Response dropPartition(String user, String db, - String table, String partition, boolean ifExists, - String group, String permissions) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = String.format("use %s; alter table %s drop", db, table); - if (ifExists) - exec += " if exists"; - exec += String.format(" partition (%s);", partition); - - try { - String res = jsonRun(user, exec, group, permissions, true); - return JsonBuilder.create(res) - .put("database", db) - .put("table", table) - .put("partition", partition) - .build(); - } catch (HcatException e) { - throw new HcatException("unable to drop partition: " + partition, - e.execBean, exec); - } - } - - /** - * Return a json description of the columns. Same as - * describeTable. - */ - public Response listColumns(String user, String db, String table) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - try { - return descTable(user, db, table, false); - } catch (HcatException e) { - throw new HcatException("unable to show columns for table: " + table, - e.execBean, e.statement); - } - } - - /** - * Return a json description of one column. - */ - public Response descOneColumn(String user, String db, String table, String column) - throws SimpleWebException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - Response res = listColumns(user, db, table); - if (res.getStatus() != HttpStatus.OK_200) - return res; - - Object o = res.getEntity(); - final Map fields = (o != null && (o instanceof Map)) ? (Map) o : null; - if (fields == null) - throw new SimpleWebException(HttpStatus.INTERNAL_SERVER_ERROR_500, "Internal error, unable to find column " - + column); - - - List cols = (List) fields.get("columns"); - Map found = null; - if (cols != null) { - for (Map col : cols) { - if (column.equals(col.get("name"))) { - found = col; - break; - } - } - } - if (found == null) - throw new SimpleWebException(HttpStatus.INTERNAL_SERVER_ERROR_500, "unable to find column " + column, - new HashMap() { - { - put("description", fields); - } - }); - fields.remove("columns"); - fields.put("column", found); - return Response.fromResponse(res).entity(fields).build(); - } - - /** - * Add one column. - */ - public Response addOneColumn(String user, String db, String table, - ColumnDesc desc) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - String exec = String.format("use %s; alter table %s add columns (%s %s", - db, table, desc.name, desc.type); - if (TempletonUtils.isset(desc.comment)) - exec += String.format(" comment '%s'", desc.comment); - exec += ");"; - try { - String res = jsonRun(user, exec, desc.group, desc.permissions, true); - return JsonBuilder.create(res) - .put("database", db) - .put("table", table) - .put("column", desc.name) - .build(); - } catch (HcatException e) { - throw new HcatException("unable to add column: " + desc, - e.execBean, exec); - } - } - - // Check that the hcat result is valid and or has a valid json - // error - private boolean isValid(ExecBean eb, boolean requireEmptyOutput) { - if (eb == null) - return false; - - try { - Map m = JsonBuilder.jsonToMap(eb.stdout); - if (m.containsKey("error")) // This is a valid error message. - return true; - } catch (IOException e) { - return false; - } - - if (eb.exitcode != 0) - return false; - - if (requireEmptyOutput) - if (TempletonUtils.isset(eb.stdout)) - return false; - - return true; - } - - // Run an hcat expression and return just the json outout. - private String jsonRun(String user, String exec, - String group, String permissions, - boolean requireEmptyOutput) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - ExecBean res = run(user, exec, true, group, permissions); - - if (!isValid(res, requireEmptyOutput)) - throw new HcatException("Failure calling hcat: " + exec, res, exec); - - return res.stdout; - } - - // Run an hcat expression and return just the json outout. No - // permissions set. - private String jsonRun(String user, String exec) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - return jsonRun(user, exec, null, null); - } - - // Run an hcat expression and return just the json outout. - private String jsonRun(String user, String exec, - String group, String permissions) - throws HcatException, NotAuthorizedException, BusyException, - ExecuteException, IOException { - return jsonRun(user, exec, group, permissions, false); - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/HcatException.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/HcatException.java deleted file mode 100644 index dae71f7..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/HcatException.java +++ /dev/null @@ -1,42 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import org.eclipse.jetty.http.HttpStatus; - -import java.util.HashMap; - -/** - * Unable to run hcat on the job. - */ -public class HcatException extends SimpleWebException { - public ExecBean execBean; - public String statement; - - public HcatException(String msg, final ExecBean bean, final String statement) { - super(HttpStatus.INTERNAL_SERVER_ERROR_500, msg, new HashMap() { - { - put("exec", bean); - put("statement", statement); - } - }); - execBean = bean; - this.statement = statement; - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/HiveDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/HiveDelegator.java deleted file mode 100644 index 3c207cb..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/HiveDelegator.java +++ /dev/null @@ -1,116 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.exec.ExecuteException; -import org.apache.hcatalog.templeton.tool.TempletonControllerJob; -import org.apache.hcatalog.templeton.tool.TempletonUtils; - -/** - * Submit a Hive job. - * - * This is the backend of the hive web service. - */ -public class HiveDelegator extends LauncherDelegator { - - public HiveDelegator(AppConfig appConf) { - super(appConf); - } - - public EnqueueBean run(String user, - String execute, String srcFile, List defines, - String statusdir, String callback, String completedUrl) - throws NotAuthorizedException, BadParam, BusyException, QueueException, - ExecuteException, IOException, InterruptedException - { - runAs = user; - List args = makeArgs(execute, srcFile, defines, statusdir, - completedUrl); - - return enqueueController(user, callback, args); - } - - private List makeArgs(String execute, String srcFile, - List defines, String statusdir, String completedUrl) - throws BadParam, IOException, InterruptedException - { - ArrayList args = new ArrayList(); - try { - args.addAll(makeBasicArgs(execute, srcFile, statusdir, completedUrl)); - args.add("--"); - args.add(appConf.hivePath()); - - args.add("--service"); - args.add("cli"); - - //the token file location as initial hiveconf arg - args.add("--hiveconf"); - args.add(TempletonControllerJob.TOKEN_FILE_ARG_PLACEHOLDER); - - for (String prop : appConf.getStrings(AppConfig.HIVE_PROPS_NAME)) { - args.add("--hiveconf"); - args.add(prop); - } - for (String prop : defines) { - args.add("--hiveconf"); - args.add(prop); - } - if (TempletonUtils.isset(execute)) { - args.add("-e"); - args.add(execute); - } else if (TempletonUtils.isset(srcFile)) { - args.add("-f"); - args.add(TempletonUtils.hadoopFsPath(srcFile, appConf, runAs) - .getName()); - } - } catch (FileNotFoundException e) { - throw new BadParam(e.getMessage()); - } catch (URISyntaxException e) { - throw new BadParam(e.getMessage()); - } - - return args; - } - - private List makeBasicArgs(String execute, String srcFile, - String statusdir, String completedUrl) - throws URISyntaxException, FileNotFoundException, IOException, - InterruptedException - { - ArrayList args = new ArrayList(); - - ArrayList allFiles = new ArrayList(); - if (TempletonUtils.isset(srcFile)) - allFiles.add(TempletonUtils.hadoopFsFilename(srcFile, appConf, - runAs)); - - args.addAll(makeLauncherArgs(appConf, statusdir, completedUrl, allFiles)); - - args.add("-archives"); - args.add(appConf.hiveArchive()); - - return args; - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/JarDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/JarDelegator.java deleted file mode 100644 index a2dc23e..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/JarDelegator.java +++ /dev/null @@ -1,98 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.exec.ExecuteException; -import org.apache.hcatalog.templeton.tool.TempletonControllerJob; -import org.apache.hcatalog.templeton.tool.TempletonUtils; - -/** - * Submit a job to the MapReduce queue. - * - * This is the backend of the mapreduce/jar web service. - */ -public class JarDelegator extends LauncherDelegator { - public JarDelegator(AppConfig appConf) { - super(appConf); - } - - public EnqueueBean run(String user, String jar, String mainClass, - String libjars, String files, - List jarArgs, List defines, - String statusdir, String callback, String completedUrl) - throws NotAuthorizedException, BadParam, BusyException, QueueException, - ExecuteException, IOException, InterruptedException { - runAs = user; - List args = makeArgs(jar, mainClass, - libjars, files, jarArgs, defines, - statusdir, completedUrl); - - return enqueueController(user, callback, args); - } - - private List makeArgs(String jar, String mainClass, - String libjars, String files, - List jarArgs, List defines, - String statusdir, String completedUrl) - throws BadParam, IOException, InterruptedException { - ArrayList args = new ArrayList(); - try { - ArrayList allFiles = new ArrayList(); - allFiles.add(TempletonUtils.hadoopFsFilename(jar, appConf, runAs)); - - args.addAll(makeLauncherArgs(appConf, statusdir, - completedUrl, allFiles)); - args.add("--"); - args.add(appConf.clusterHadoop()); - args.add("jar"); - args.add(TempletonUtils.hadoopFsPath(jar, appConf, runAs).getName()); - if (TempletonUtils.isset(mainClass)) - args.add(mainClass); - if (TempletonUtils.isset(libjars)) { - args.add("-libjars"); - args.add(TempletonUtils.hadoopFsListAsString(libjars, appConf, - runAs)); - } - if (TempletonUtils.isset(files)) { - args.add("-files"); - args.add(TempletonUtils.hadoopFsListAsString(files, appConf, - runAs)); - } - //the token file location comes after mainClass, as a -Dprop=val - args.add("-D" + TempletonControllerJob.TOKEN_FILE_ARG_PLACEHOLDER); - - for (String d : defines) - args.add("-D" + d); - - args.addAll(jarArgs); - } catch (FileNotFoundException e) { - throw new BadParam(e.getMessage()); - } catch (URISyntaxException e) { - throw new BadParam(e.getMessage()); - } - - return args; - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/JsonBuilder.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/JsonBuilder.java deleted file mode 100644 index 7d01cbb..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/JsonBuilder.java +++ /dev/null @@ -1,194 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.Response; - -import org.apache.hadoop.hive.ql.ErrorMsg; -import org.apache.hcatalog.templeton.tool.TempletonUtils; -import org.codehaus.jackson.map.ObjectMapper; -import org.eclipse.jetty.http.HttpStatus; - -/** - * Helper class to build new json objects with new top level - * properties. Only add non-null entries. - */ -public class JsonBuilder { - private static final Map hiveError2HttpStatusCode = new HashMap(); - - /** - * It's expected that Hive (and thus HCat CLI) will return canonical error msgs/codes. - * Here they are mapped to appropriate HTTP Status Code. - */ - static { - hiveError2HttpStatusCode.put(ErrorMsg.GENERIC_ERROR.getErrorCode(), HttpStatus.INTERNAL_SERVER_ERROR_500); - hiveError2HttpStatusCode.put(ErrorMsg.DATABASE_NOT_EXISTS.getErrorCode(), HttpStatus.NOT_FOUND_404); - hiveError2HttpStatusCode.put(ErrorMsg.INVALID_TABLE.getErrorCode(), HttpStatus.NOT_FOUND_404); - hiveError2HttpStatusCode.put(ErrorMsg.TABLE_NOT_PARTITIONED.getErrorCode(), HttpStatus.NOT_FOUND_404); - hiveError2HttpStatusCode.put(ErrorMsg.INVALID_PARTITION.getErrorCode(), HttpStatus.NOT_FOUND_404); - - hiveError2HttpStatusCode.put(ErrorMsg.DUPLICATE_COLUMN_NAMES.getErrorCode(), HttpStatus.CONFLICT_409); - hiveError2HttpStatusCode.put(ErrorMsg.DATABSAE_ALREADY_EXISTS.getErrorCode(), HttpStatus.CONFLICT_409); - hiveError2HttpStatusCode.put(ErrorMsg.PARTITION_EXISTS.getErrorCode(), HttpStatus.CONFLICT_409); - hiveError2HttpStatusCode.put(ErrorMsg.TABLE_ALREADY_EXISTS.getErrorCode(), HttpStatus.CONFLICT_409); - } - - // The map we're building. - private Map map; - - // Parse the json map. - private JsonBuilder(String json) - throws IOException { - map = jsonToMap(json); - } - - /** - * Create a new map object from the existing json. - */ - public static JsonBuilder create(String json) - throws IOException { - return new JsonBuilder(json); - } - - /** - * Create a new map object. - */ - public static JsonBuilder create() - throws IOException { - return new JsonBuilder(null); - } - - /** - * Create a new map error object. - */ - public static JsonBuilder createError(String msg, int errorCode) - throws IOException { - return new JsonBuilder(null) - .put("error", msg) - .put("errorCode", errorCode); - } - - /** - * Add a non-null value to the map. - */ - public JsonBuilder put(String name, Object val) { - if (val != null) - map.put(name, val); - return this; - } - - /** - * Remove a value from the map. - */ - public JsonBuilder remove(String name) { - map.remove(name); - return this; - } - - /** - * Get the underlying map. - */ - public Map getMap() { - return map; - } - - /** - * Turn the map back to response object. - */ - public Response build() { - return buildResponse(); - } - - /** - * Turn the map back to json. - */ - public String buildJson() - throws IOException { - return mapToJson(map); - } - - /** - * Turn the map back to response object. - */ - public Response buildResponse() { - int status = HttpStatus.OK_200; // Server ok. - if (map.containsKey("error")) - status = HttpStatus.INTERNAL_SERVER_ERROR_500; // Generic http server error. - Object o = map.get("errorCode"); - if (o != null) { - if(hiveError2HttpStatusCode.containsKey(o)) { - status = hiveError2HttpStatusCode.get(o); - } - } - return buildResponse(status); - } - - /** - * Turn the map back to response object. - */ - public Response buildResponse(int status) { - return Response.status(status) - .entity(map) - .type(MediaType.APPLICATION_JSON) - .build(); - } - - /** - * Is the object non-empty? - */ - public boolean isset() { - return TempletonUtils.isset(map); - } - - /** - * Check if this is an error doc. - */ - public static boolean isError(Map obj) { - return (obj != null) && obj.containsKey("error"); - } - - /** - * Convert a json string to a Map. - */ - public static Map jsonToMap(String json) - throws IOException { - if (!TempletonUtils.isset(json)) - return new HashMap(); - else { - ObjectMapper mapper = new ObjectMapper(); - return mapper.readValue(json, Map.class); - } - } - - /** - * Convert a map to a json string. - */ - public static String mapToJson(Object obj) - throws IOException { - ObjectMapper mapper = new ObjectMapper(); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - mapper.writeValue(out, obj); - return out.toString(); - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/LauncherDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/LauncherDelegator.java deleted file mode 100644 index 20c7a1f..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/LauncherDelegator.java +++ /dev/null @@ -1,198 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.io.IOException; -import java.security.PrivilegedExceptionAction; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.exec.ExecuteException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.util.ToolRunner; -import org.apache.hcatalog.templeton.tool.JobState; -import org.apache.hcatalog.templeton.tool.TempletonControllerJob; -import org.apache.hcatalog.templeton.tool.TempletonStorage; -import org.apache.hcatalog.templeton.tool.TempletonUtils; -import org.apache.hcatalog.templeton.tool.ZooKeeperStorage; - -/** - * The helper class for all the Templeton delegator classes that - * launch child jobs. - */ -public class LauncherDelegator extends TempletonDelegator { - private static final Log LOG = LogFactory.getLog(LauncherDelegator.class); - protected String runAs = null; - - public LauncherDelegator(AppConfig appConf) { - super(appConf); - } - - public void registerJob(String id, String user, String callback) - throws IOException { - JobState state = null; - try { - state = new JobState(id, Main.getAppConfigInstance()); - state.setUser(user); - state.setCallback(callback); - } finally { - if (state != null) - state.close(); - } - } - - /** - * Enqueue the TempletonControllerJob directly calling doAs. - */ - public EnqueueBean enqueueController(String user, String callback, - List args) - throws NotAuthorizedException, BusyException, ExecuteException, - IOException, QueueException { - try { - UserGroupInformation ugi = UgiFactory.getUgi(user); - - final long startTime = System.nanoTime(); - - String id = queueAsUser(ugi, args); - - long elapsed = ((System.nanoTime() - startTime) / ((int) 1e6)); - LOG.debug("queued job " + id + " in " + elapsed + " ms"); - - if (id == null) - throw new QueueException("Unable to get job id"); - - registerJob(id, user, callback); - - return new EnqueueBean(id); - } catch (InterruptedException e) { - throw new QueueException("Unable to launch job " + e); - } - } - - private String queueAsUser(UserGroupInformation ugi, final List args) - throws IOException, InterruptedException { - String id = ugi.doAs(new PrivilegedExceptionAction() { - public String run() throws Exception { - String[] array = new String[args.size()]; - TempletonControllerJob ctrl = new TempletonControllerJob(); - ToolRunner.run(ctrl, args.toArray(array)); - return ctrl.getSubmittedId(); - } - }); - - return id; - } - - public List makeLauncherArgs(AppConfig appConf, String statusdir, - String completedUrl, - List copyFiles) { - ArrayList args = new ArrayList(); - - args.add("-libjars"); - args.add(appConf.libJars()); - addCacheFiles(args, appConf); - - // Hadoop vars - addDef(args, "user.name", runAs); - addDef(args, AppConfig.HADOOP_SPECULATIVE_NAME, "false"); - addDef(args, AppConfig.HADOOP_CHILD_JAVA_OPTS, appConf.controllerMRChildOpts()); - - // Internal vars - addDef(args, TempletonControllerJob.STATUSDIR_NAME, statusdir); - addDef(args, TempletonControllerJob.COPY_NAME, - TempletonUtils.encodeArray(copyFiles)); - addDef(args, TempletonControllerJob.OVERRIDE_CLASSPATH, - makeOverrideClasspath(appConf)); - - // Hadoop queue information - addDef(args, "mapred.job.queue.name", appConf.hadoopQueueName()); - - // Job vars - addStorageVars(args); - addCompletionVars(args, completedUrl); - - return args; - } - - // Storage vars - private void addStorageVars(List args) { - addDef(args, TempletonStorage.STORAGE_CLASS, - appConf.get(TempletonStorage.STORAGE_CLASS)); - addDef(args, TempletonStorage.STORAGE_ROOT, - appConf.get(TempletonStorage.STORAGE_ROOT)); - addDef(args, ZooKeeperStorage.ZK_HOSTS, - appConf.get(ZooKeeperStorage.ZK_HOSTS)); - addDef(args, ZooKeeperStorage.ZK_SESSION_TIMEOUT, - appConf.get(ZooKeeperStorage.ZK_SESSION_TIMEOUT)); - } - - // Completion notifier vars - private void addCompletionVars(List args, String completedUrl) { - addDef(args, AppConfig.HADOOP_END_RETRY_NAME, - appConf.get(AppConfig.CALLBACK_RETRY_NAME)); - addDef(args, AppConfig.HADOOP_END_INTERVAL_NAME, - appConf.get(AppConfig.CALLBACK_INTERVAL_NAME)); - addDef(args, AppConfig.HADOOP_END_URL_NAME, completedUrl); - } - - /** - * Add files to the Distributed Cache for the controller job. - */ - public static void addCacheFiles(List args, AppConfig appConf) { - String overrides = appConf.overrideJarsString(); - if (overrides != null) { - args.add("-files"); - args.add(overrides); - } - } - - /** - * Create the override classpath, which will be added to - * HADOOP_CLASSPATH at runtime by the controller job. - */ - public static String makeOverrideClasspath(AppConfig appConf) { - String[] overrides = appConf.overrideJars(); - if (overrides == null) - return null; - - ArrayList cp = new ArrayList(); - for (String fname : overrides) { - Path p = new Path(fname); - cp.add(p.getName()); - } - return StringUtils.join(":", cp); - } - - - /** - * Add a Hadoop command line definition to args if the value is - * not null. - */ - public static void addDef(List args, String name, String val) { - if (val != null) { - args.add("-D"); - args.add(name + "=" + val); - } - } - -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/ListDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/ListDelegator.java deleted file mode 100644 index 5fbbde8..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/ListDelegator.java +++ /dev/null @@ -1,75 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.io.IOException; -import java.util.List; -import java.util.ArrayList; - -import org.apache.hadoop.hive.shims.HadoopShims.WebHCatJTShim; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.mapred.JobStatus; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hcatalog.templeton.tool.JobState; - -/** - * List jobs owned by a user. - */ -public class ListDelegator extends TempletonDelegator { - public ListDelegator(AppConfig appConf) { - super(appConf); - } - - public List run(String user, boolean showall) - throws NotAuthorizedException, BadParam, IOException, InterruptedException { - - UserGroupInformation ugi = UgiFactory.getUgi(user); - WebHCatJTShim tracker = null; - try { - tracker = ShimLoader.getHadoopShims().getWebHCatShim(appConf, ugi); - - ArrayList ids = new ArrayList(); - - JobStatus[] jobs = tracker.getAllJobs(); - - if (jobs != null) { - for (JobStatus job : jobs) { - JobState state = null; - try { - String id = job.getJobID().toString(); - state = new JobState(id, Main.getAppConfigInstance()); - if (showall || user.equals(state.getUser())) - ids.add(id); - } finally { - if (state != null) { - state.close(); - } - } - } - } - - return ids; - } catch (IllegalStateException e) { - throw new BadParam(e.getMessage()); - } finally { - if (tracker != null) - tracker.close(); - } - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/Main.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/Main.java deleted file mode 100644 index 3761228..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/Main.java +++ /dev/null @@ -1,252 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import com.sun.jersey.api.core.PackagesResourceConfig; -import com.sun.jersey.spi.container.servlet.ServletContainer; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hdfs.web.AuthFilter; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.util.GenericOptionsParser; -import org.eclipse.jetty.rewrite.handler.RedirectPatternRule; -import org.eclipse.jetty.rewrite.handler.RewriteHandler; -import org.eclipse.jetty.server.Handler; -import org.eclipse.jetty.server.Server; -import org.eclipse.jetty.server.handler.HandlerList; -import org.eclipse.jetty.servlet.FilterHolder; -import org.eclipse.jetty.servlet.FilterMapping; -import org.eclipse.jetty.servlet.ServletContextHandler; -import org.eclipse.jetty.servlet.ServletHolder; -import org.slf4j.bridge.SLF4JBridgeHandler; - -/** - * The main executable that starts up and runs the Server. - */ -public class Main { - public static final String SERVLET_PATH = "templeton"; - private static final Log LOG = LogFactory.getLog(Main.class); - - public static final int DEFAULT_PORT = 8080; - private Server server; - - private static volatile AppConfig conf; - - /** - * Retrieve the config singleton. - */ - public static synchronized AppConfig getAppConfigInstance() { - if (conf == null) - LOG.error("Bug: configuration not yet loaded"); - return conf; - } - - Main(String[] args) { - init(args); - } - - public void init(String[] args) { - initLogger(); - conf = loadConfig(args); - conf.startCleanup(); - LOG.debug("Loaded conf " + conf); - } - - // Jersey uses java.util.logging - bridge to slf4 - private void initLogger() { - java.util.logging.Logger rootLogger - = java.util.logging.LogManager.getLogManager().getLogger(""); - for (java.util.logging.Handler h : rootLogger.getHandlers()) - rootLogger.removeHandler(h); - - SLF4JBridgeHandler.install(); - } - - public AppConfig loadConfig(String[] args) { - AppConfig cf = new AppConfig(); - try { - GenericOptionsParser parser = new GenericOptionsParser(cf, args); - if (parser.getRemainingArgs().length > 0) - usage(); - } catch (IOException e) { - LOG.error("Unable to parse options: " + e); - usage(); - } - - return cf; - } - - public void usage() { - System.err.println("usage: templeton [-Dtempleton.port=N] [-D...]"); - System.exit(1); - } - - public void run() { - int port = conf.getInt(AppConfig.PORT, DEFAULT_PORT); - try { - checkEnv(); - runServer(port); - System.out.println("templeton: listening on port " + port); - LOG.info("Templeton listening on port " + port); - } catch (Exception e) { - System.err.println("templeton: Server failed to start: " + e.getMessage()); - LOG.fatal("Server failed to start: " + e); - System.exit(1); - } - } - void stop() { - if(server != null) { - try { - server.stop(); - } - catch(Exception ex) { - LOG.warn("Failed to stop jetty.Server", ex); - } - } - } - - - private void checkEnv() { - checkCurrentDirPermissions(); - - } - - private void checkCurrentDirPermissions() { - //org.apache.commons.exec.DefaultExecutor requires - // that current directory exists - File pwd = new File("."); - if (!pwd.exists()) { - String msg = "Server failed to start: templeton: Current working directory '.' does not exist!"; - System.err.println(msg); - LOG.fatal(msg); - System.exit(1); - } - } - - public Server runServer(int port) - throws Exception { - - //Authenticate using keytab - if (UserGroupInformation.isSecurityEnabled()) { - UserGroupInformation.loginUserFromKeytab(conf.kerberosPrincipal(), - conf.kerberosKeytab()); - } - - // Create the Jetty server - Server server = new Server(port); - ServletContextHandler root = new ServletContextHandler(server, "/"); - - // Add the Auth filter - FilterHolder fHolder = makeAuthFilter(); - - /* - * We add filters for each of the URIs supported by templeton. - * If we added the entire sub-structure using '/*', the mapreduce - * notification cannot give the callback to templeton in secure mode. - * This is because mapreduce does not use secure credentials for - * callbacks. So jetty would fail the request as unauthorized. - */ - root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/ddl/*", - FilterMapping.REQUEST); - root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/pig/*", - FilterMapping.REQUEST); - root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/hive/*", - FilterMapping.REQUEST); - root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/queue/*", - FilterMapping.REQUEST); - root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/mapreduce/*", - FilterMapping.REQUEST); - root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/status/*", - FilterMapping.REQUEST); - root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/version/*", - FilterMapping.REQUEST); - - // Connect Jersey - ServletHolder h = new ServletHolder(new ServletContainer(makeJerseyConfig())); - root.addServlet(h, "/" + SERVLET_PATH + "/*"); - // Add any redirects - addRedirects(server); - - // Start the server - server.start(); - this.server = server; - return server; - } - - // Configure the AuthFilter with the Kerberos params iff security - // is enabled. - public FilterHolder makeAuthFilter() { - FilterHolder authFilter = new FilterHolder(AuthFilter.class); - if (UserGroupInformation.isSecurityEnabled()) { - authFilter.setInitParameter("dfs.web.authentication.signature.secret", - conf.kerberosSecret()); - authFilter.setInitParameter("dfs.web.authentication.kerberos.principal", - conf.kerberosPrincipal()); - authFilter.setInitParameter("dfs.web.authentication.kerberos.keytab", - conf.kerberosKeytab()); - } - return authFilter; - } - - public PackagesResourceConfig makeJerseyConfig() { - PackagesResourceConfig rc - = new PackagesResourceConfig("org.apache.hcatalog.templeton"); - HashMap props = new HashMap(); - props.put("com.sun.jersey.api.json.POJOMappingFeature", "true"); - props.put("com.sun.jersey.config.property.WadlGeneratorConfig", - "org.apache.hcatalog.templeton.WadlConfig"); - rc.setPropertiesAndFeatures(props); - - return rc; - } - - public void addRedirects(Server server) { - RewriteHandler rewrite = new RewriteHandler(); - - RedirectPatternRule redirect = new RedirectPatternRule(); - redirect.setPattern("/templeton/v1/application.wadl"); - redirect.setLocation("/templeton/application.wadl"); - rewrite.addRule(redirect); - - HandlerList handlerlist = new HandlerList(); - ArrayList handlers = new ArrayList(); - - // Any redirect handlers need to be added first - handlers.add(rewrite); - - // Now add all the default handlers - for (Handler handler : server.getHandlers()) { - handlers.add(handler); - } - Handler[] newlist = new Handler[handlers.size()]; - handlerlist.setHandlers(handlers.toArray(newlist)); - server.setHandler(handlerlist); - } - - public static void main(String[] args) { - Main templeton = new Main(args); - templeton.run(); - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/MaxByteArrayOutputStream.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/MaxByteArrayOutputStream.java deleted file mode 100644 index dbc659a..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/MaxByteArrayOutputStream.java +++ /dev/null @@ -1,76 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.io.ByteArrayOutputStream; - -/** - * An output stream that will only accept the first N bytes of data. - */ -public class MaxByteArrayOutputStream extends ByteArrayOutputStream { - /** - * The max number of bytes stored. - */ - private int maxBytes; - - /** - * The number of bytes currently stored. - */ - private int nBytes; - - /** - * Create. - */ - public MaxByteArrayOutputStream(int maxBytes) { - this.maxBytes = maxBytes; - nBytes = 0; - } - - /** - * Writes the specified byte to this byte array output stream. - * Any bytes after the first maxBytes will be ignored. - * - * @param b the byte to be written. - */ - public synchronized void write(int b) { - if (nBytes < maxBytes) { - ++nBytes; - super.write(b); - } - } - - /** - * Writes len bytes from the specified byte array - * starting at offset off to this byte array output stream. - * Any bytes after the first maxBytes will be ignored. - * - * @param b the data. - * @param off the start offset in the data. - * @param len the number of bytes to write. - */ - public synchronized void write(byte b[], int off, int len) { - int storable = Math.min(maxBytes - nBytes, len); - if (storable > 0) { - nBytes += storable; - super.write(b, off, storable); - } - } - - -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/NotAuthorizedException.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/NotAuthorizedException.java deleted file mode 100644 index 3245b17..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/NotAuthorizedException.java +++ /dev/null @@ -1,30 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import org.eclipse.jetty.http.HttpStatus; - -/** - * Simple "user not found" type exception. - */ -public class NotAuthorizedException extends SimpleWebException { - public NotAuthorizedException(String msg) { - super(HttpStatus.UNAUTHORIZED_401, msg); - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/PartitionDesc.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/PartitionDesc.java deleted file mode 100644 index f71c04e..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/PartitionDesc.java +++ /dev/null @@ -1,38 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import javax.xml.bind.annotation.XmlRootElement; - -/** - * A description of the partition to create. - */ -@XmlRootElement -public class PartitionDesc extends GroupPermissionsDesc { - public String partition; - public String location; - public boolean ifNotExists = false; - - public PartitionDesc() {} - - public String toString() { - return String.format("PartitionDesc(partition=%s, location=%s, ifNotExists=%s)", - partition, location, ifNotExists); - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/PigDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/PigDelegator.java deleted file mode 100644 index b318373..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/PigDelegator.java +++ /dev/null @@ -1,97 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import org.apache.commons.exec.ExecuteException; -import org.apache.hcatalog.templeton.tool.TempletonControllerJob; -import org.apache.hcatalog.templeton.tool.TempletonUtils; - -/** - * Submit a Pig job. - * - * This is the backend of the pig web service. - */ -public class PigDelegator extends LauncherDelegator { - public PigDelegator(AppConfig appConf) { - super(appConf); - } - - public EnqueueBean run(String user, - String execute, String srcFile, - List pigArgs, String otherFiles, - String statusdir, String callback, String completedUrl) - throws NotAuthorizedException, BadParam, BusyException, QueueException, - ExecuteException, IOException, InterruptedException { - runAs = user; - List args = makeArgs(execute, - srcFile, pigArgs, - otherFiles, statusdir, completedUrl); - - return enqueueController(user, callback, args); - } - - private List makeArgs(String execute, String srcFile, - List pigArgs, String otherFiles, - String statusdir, String completedUrl) - throws BadParam, IOException, InterruptedException { - ArrayList args = new ArrayList(); - try { - ArrayList allFiles = new ArrayList(); - if (TempletonUtils.isset(srcFile)) - allFiles.add(TempletonUtils.hadoopFsFilename - (srcFile, appConf, runAs)); - if (TempletonUtils.isset(otherFiles)) { - String[] ofs = TempletonUtils.hadoopFsListAsArray(otherFiles, appConf, runAs); - allFiles.addAll(Arrays.asList(ofs)); - } - - args.addAll(makeLauncherArgs(appConf, statusdir, completedUrl, allFiles)); - args.add("-archives"); - args.add(appConf.pigArchive()); - - args.add("--"); - args.add(appConf.pigPath()); - //the token file location should be first argument of pig - args.add("-D" + TempletonControllerJob.TOKEN_FILE_ARG_PLACEHOLDER); - - args.addAll(pigArgs); - if (TempletonUtils.isset(execute)) { - args.add("-execute"); - args.add(execute); - } else if (TempletonUtils.isset(srcFile)) { - args.add("-file"); - args.add(TempletonUtils.hadoopFsPath(srcFile, appConf, runAs) - .getName()); - } - } catch (FileNotFoundException e) { - throw new BadParam(e.getMessage()); - } catch (URISyntaxException e) { - throw new BadParam(e.getMessage()); - } - - return args; - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/ProxyUserSupport.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/ProxyUserSupport.java deleted file mode 100644 index 5aa032b..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/ProxyUserSupport.java +++ /dev/null @@ -1,241 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.security.Groups; - -import java.io.IOException; -import java.net.InetAddress; -import java.net.UnknownHostException; -import java.text.MessageFormat; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -/** - * When WebHCat is run with doAs query parameter this class ensures that user making the - * call is allowed to impersonate doAs user and is making a call from authorized host. - */ -final class ProxyUserSupport { - private static final Log LOG = LogFactory.getLog(ProxyUserSupport.class); - private static final String CONF_PROXYUSER_PREFIX = "webhcat.proxyuser."; - private static final String CONF_GROUPS_SUFFIX = ".groups"; - private static final String CONF_HOSTS_SUFFIX = ".hosts"; - private static final Set WILD_CARD = Collections.unmodifiableSet(new HashSet(0)); - private static final Map> proxyUserGroups = new HashMap>(); - private static final Map> proxyUserHosts = new HashMap>(); - - static void processProxyuserConfig(AppConfig conf) { - for(Map.Entry confEnt : conf) { - if(confEnt.getKey().startsWith(CONF_PROXYUSER_PREFIX) - && confEnt.getKey().endsWith(CONF_GROUPS_SUFFIX)) { - //process user groups for which doAs is authorized - String proxyUser = - confEnt.getKey().substring(CONF_PROXYUSER_PREFIX.length(), - confEnt.getKey().lastIndexOf(CONF_GROUPS_SUFFIX)); - Set groups; - if("*".equals(confEnt.getValue())) { - groups = WILD_CARD; - if(LOG.isDebugEnabled()) { - LOG.debug("User [" + proxyUser + "] is authorized to do doAs any user."); - } - } - else if(confEnt.getValue() != null && confEnt.getValue().trim().length() > 0) { - groups = new HashSet(Arrays.asList(confEnt.getValue().trim().split(","))); - if(LOG.isDebugEnabled()) { - LOG.debug("User [" + proxyUser + - "] is authorized to do doAs for users in the following groups: [" - + confEnt.getValue().trim() + "]"); - } - } - else { - groups = Collections.emptySet(); - if(LOG.isDebugEnabled()) { - LOG.debug("User [" + proxyUser + - "] is authorized to do doAs for users in the following groups: []"); - } - } - proxyUserGroups.put(proxyUser, groups); - } - else if(confEnt.getKey().startsWith(CONF_PROXYUSER_PREFIX) - && confEnt.getKey().endsWith(CONF_HOSTS_SUFFIX)) { - //process hosts from which doAs requests are authorized - String proxyUser = confEnt.getKey().substring(CONF_PROXYUSER_PREFIX.length(), - confEnt.getKey().lastIndexOf(CONF_HOSTS_SUFFIX)); - Set hosts; - if("*".equals(confEnt.getValue())) { - hosts = WILD_CARD; - if(LOG.isDebugEnabled()) { - LOG.debug("User [" + proxyUser + "] is authorized to do doAs from any host."); - } - } - else if(confEnt.getValue() != null && confEnt.getValue().trim().length() > 0) { - String[] hostValues = confEnt.getValue().trim().split(","); - hosts = new HashSet(); - for(String hostname : hostValues) { - String nhn = normalizeHostname(hostname); - if(nhn != null) { - hosts.add(nhn); - } - } - if(LOG.isDebugEnabled()) { - LOG.debug("User [" + proxyUser + - "] is authorized to do doAs from the following hosts: [" - + confEnt.getValue().trim() + "]"); - } - } - else { - hosts = Collections.emptySet(); - if(LOG.isDebugEnabled()) { - LOG.debug("User [" + proxyUser - + "] is authorized to do doAs from the following hosts: []"); - } - } - proxyUserHosts.put(proxyUser, hosts); - } - } - } - /** - * Verifies a that proxyUser is making the request from authorized host and that doAs user - * belongs to one of the groups for which proxyUser is allowed to impersonate users. - * - * @param proxyUser user name of the proxy (logged in) user. - * @param proxyHost host the proxy user is making the request from. - * @param doAsUser user the proxy user is impersonating. - * @throws NotAuthorizedException thrown if the user is not allowed to perform the proxyuser request. - */ - static void validate(String proxyUser, String proxyHost, String doAsUser) throws - NotAuthorizedException { - assertNotEmpty(proxyUser, "proxyUser", - "If you're attempting to use user-impersonation via a proxy user, please make sure that " - + CONF_PROXYUSER_PREFIX + "#USER#" + CONF_HOSTS_SUFFIX + " and " - + CONF_PROXYUSER_PREFIX + "#USER#" + CONF_GROUPS_SUFFIX - + " are configured correctly"); - assertNotEmpty(proxyHost, "proxyHost", - "If you're attempting to use user-impersonation via a proxy user, please make sure that " - + CONF_PROXYUSER_PREFIX + proxyUser + CONF_HOSTS_SUFFIX + " and " - + CONF_PROXYUSER_PREFIX + proxyUser + CONF_GROUPS_SUFFIX - + " are configured correctly"); - assertNotEmpty(doAsUser, Server.DO_AS_PARAM); - LOG.debug(MessageFormat.format("Authorization check proxyuser [{0}] host [{1}] doAs [{2}]", - proxyUser, proxyHost, doAsUser)); - if (proxyUserHosts.containsKey(proxyUser)) { - proxyHost = normalizeHostname(proxyHost); - validateRequestorHost(proxyUser, proxyHost); - validateGroup(proxyUser, doAsUser); - } - else { - throw new NotAuthorizedException(MessageFormat.format( - "User [{0}] not defined as proxyuser", proxyUser)); - } - } - - private static void validateRequestorHost(String proxyUser, String hostname) throws - NotAuthorizedException { - Set validHosts = proxyUserHosts.get(proxyUser); - if (validHosts == WILD_CARD) { - return; - } - if (validHosts == null || !validHosts.contains(hostname)) { - throw new NotAuthorizedException(MessageFormat.format( - "Unauthorized host [{0}] for proxyuser [{1}]", hostname, proxyUser)); - } - } - - private static void validateGroup(String proxyUser, String doAsUser) throws - NotAuthorizedException { - Set validGroups = proxyUserGroups.get(proxyUser); - if(validGroups == WILD_CARD) { - return; - } - else if(validGroups == null || validGroups.isEmpty()) { - throw new NotAuthorizedException( - MessageFormat.format( - "Unauthorized proxyuser [{0}] for doAsUser [{1}], not in proxyuser groups", - proxyUser, doAsUser)); - } - Groups groupsInfo = new Groups(Main.getAppConfigInstance()); - try { - List userGroups = groupsInfo.getGroups(doAsUser); - for (String g : validGroups) { - if (userGroups.contains(g)) { - return; - } - } - } - catch (IOException ex) {//thrown, for example, if there is no such user on the system - LOG.warn(MessageFormat.format("Unable to get list of groups for doAsUser [{0}].", - doAsUser), ex); - } - throw new NotAuthorizedException( - MessageFormat.format( - "Unauthorized proxyuser [{0}] for doAsUser [{1}], not in proxyuser groups", - proxyUser, doAsUser)); - } - - private static String normalizeHostname(String name) { - try { - InetAddress address = InetAddress.getByName( - "localhost".equalsIgnoreCase(name) ? null : name); - return address.getCanonicalHostName(); - } - catch (UnknownHostException ex) { - LOG.warn(MessageFormat.format("Unable to normalize hostname [{0}]", name)); - return null; - } - } - /** - * Check that a string is not null and not empty. If null or empty - * throws an IllegalArgumentException. - * - * @param str value. - * @param name parameter name for the exception message. - * @return the given value. - */ - private static String assertNotEmpty(String str, String name) { - return assertNotEmpty(str, name, null); - } - - /** - * Check that a string is not null and not empty. If null or empty - * throws an IllegalArgumentException. - * - * @param str value. - * @param name parameter name for the exception message. - * @param info additional information to be printed with the exception message - * @return the given value. - */ - private static String assertNotEmpty(String str, String name, String info) { - if (str == null) { - throw new IllegalArgumentException( - name + " cannot be null" + (info == null ? "" : ", " + info)); - } - if (str.length() == 0) { - throw new IllegalArgumentException( - name + " cannot be empty" + (info == null ? "" : ", " + info)); - } - return str; - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/QueueException.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/QueueException.java deleted file mode 100644 index 8fd09e1..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/QueueException.java +++ /dev/null @@ -1,31 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import org.eclipse.jetty.http.HttpStatus; - -/** - * Unable to queue the job - */ -public class QueueException extends SimpleWebException { - public QueueException(String msg) { - super(HttpStatus.INTERNAL_SERVER_ERROR_500, msg); - } - -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/QueueStatusBean.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/QueueStatusBean.java deleted file mode 100644 index d1c20a0..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/QueueStatusBean.java +++ /dev/null @@ -1,67 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.io.IOException; - -import org.apache.hadoop.mapred.JobStatus; -import org.apache.hadoop.mapred.JobProfile; -import org.apache.hcatalog.templeton.tool.JobState; - -/** - * QueueStatusBean - The results of an exec call. - */ -public class QueueStatusBean { - public JobStatus status; - public JobProfile profile; - - public String id; - public String parentId; - public String percentComplete; - public Long exitValue; - public String user; - public String callback; - public String completed; - - public QueueStatusBean() { - } - - /** - * Create a new QueueStatusBean - * - * @param state store job state - * @param status job status - * @param profile job profile - */ - public QueueStatusBean(JobState state, JobStatus status, JobProfile profile) - throws IOException { - this.status = status; - this.profile = profile; - - id = profile.getJobID().toString(); - parentId = state.getId(); - if (id.equals(parentId)) - parentId = null; - percentComplete = state.getPercentComplete(); - exitValue = state.getExitValue(); - user = state.getUser(); - callback = state.getCallback(); - completed = state.getCompleteStatus(); - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/SecureProxySupport.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/SecureProxySupport.java deleted file mode 100644 index 7bf78c4..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/SecureProxySupport.java +++ /dev/null @@ -1,191 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.io.File; -import java.io.IOException; -import java.security.PrivilegedExceptionAction; -import java.util.List; -import java.util.Map; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.io.Text; -import org.apache.thrift.TException; -import org.apache.hadoop.security.Credentials; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.security.token.Token; - -/** - * Helper class to run jobs using Kerberos security. Always safe to - * use these methods, it's a noop if security is not enabled. - */ -public class SecureProxySupport { - private Path tokenPath; - private final String HCAT_SERVICE = "hcat"; - private boolean isEnabled; - private String user; - - public SecureProxySupport() { - isEnabled = UserGroupInformation.isSecurityEnabled(); - } - - private static final Log LOG = LogFactory.getLog(SecureProxySupport.class); - - /** - * The file where we store the auth token - */ - public Path getTokenPath() { - return (tokenPath); - } - - /** - * The token to pass to hcat. - */ - public String getHcatServiceStr() { - return (HCAT_SERVICE); - } - - /** - * Create the delegation token. - */ - public Path open(String user, Configuration conf) - throws IOException, InterruptedException { - close(); - if (isEnabled) { - this.user = user; - File t = File.createTempFile("templeton", null); - tokenPath = new Path(t.toURI()); - Token fsToken = getFSDelegationToken(user, conf); - String hcatTokenStr; - try { - hcatTokenStr = buildHcatDelegationToken(user); - } catch (Exception e) { - throw new IOException(e); - } - Token msToken = new Token(); - msToken.decodeFromUrlString(hcatTokenStr); - msToken.setService(new Text(HCAT_SERVICE)); - writeProxyDelegationTokens(fsToken, msToken, conf, user, tokenPath); - - } - return tokenPath; - } - - /** - * Cleanup - */ - public void close() { - if (tokenPath != null) { - new File(tokenPath.toUri()).delete(); - tokenPath = null; - } - } - - /** - * Add Hadoop env variables. - */ - public void addEnv(Map env) { - if (isEnabled) { - env.put(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION, - getTokenPath().toUri().getPath()); - } - } - - /** - * Add hcat args. - */ - public void addArgs(List args) { - if (isEnabled) { - args.add("-D"); - args.add("hive.metastore.token.signature=" + getHcatServiceStr()); - args.add("-D"); - args.add("proxy.user.name=" + user); - } - } - - class TokenWrapper { - Token token; - } - - private Token getFSDelegationToken(String user, - final Configuration conf) - throws IOException, InterruptedException { - LOG.info("user: " + user + " loginUser: " + UserGroupInformation.getLoginUser().getUserName()); - final UserGroupInformation ugi = UgiFactory.getUgi(user); - - final TokenWrapper twrapper = new TokenWrapper(); - ugi.doAs(new PrivilegedExceptionAction() { - public Object run() throws IOException { - FileSystem fs = FileSystem.get(conf); - twrapper.token = fs.getDelegationToken(ugi.getShortUserName()); - return null; - } - }); - return twrapper.token; - - } - - private void writeProxyDelegationTokens(final Token fsToken, - final Token msToken, - final Configuration conf, - String user, - final Path tokenPath) - throws IOException, InterruptedException { - - - LOG.info("user: " + user + " loginUser: " + UserGroupInformation.getLoginUser().getUserName()); - final UserGroupInformation ugi = UgiFactory.getUgi(user); - - - ugi.doAs(new PrivilegedExceptionAction() { - public Object run() throws IOException { - Credentials cred = new Credentials(); - cred.addToken(fsToken.getService(), fsToken); - cred.addToken(msToken.getService(), msToken); - cred.writeTokenStorageFile(tokenPath, conf); - return null; - } - }); - - } - - private String buildHcatDelegationToken(String user) - throws IOException, InterruptedException, MetaException, TException { - HiveConf c = new HiveConf(); - final HiveMetaStoreClient client = new HiveMetaStoreClient(c); - LOG.info("user: " + user + " loginUser: " + UserGroupInformation.getLoginUser().getUserName()); - final TokenWrapper twrapper = new TokenWrapper(); - final UserGroupInformation ugi = UgiFactory.getUgi(user); - String s = ugi.doAs(new PrivilegedExceptionAction() { - public String run() - throws IOException, MetaException, TException { - String u = ugi.getUserName(); - return client.getDelegationToken(u); - } - }); - return s; - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/Server.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/Server.java deleted file mode 100644 index 46bbc07..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/Server.java +++ /dev/null @@ -1,856 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.io.IOException; -import java.net.InetAddress; -import java.net.UnknownHostException; -import java.text.MessageFormat; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import javax.servlet.http.HttpServletRequest; -import javax.ws.rs.DELETE; -import javax.ws.rs.FormParam; -import javax.ws.rs.GET; -import javax.ws.rs.POST; -import javax.ws.rs.PUT; -import javax.ws.rs.Path; -import javax.ws.rs.PathParam; -import javax.ws.rs.Produces; -import javax.ws.rs.QueryParam; -import javax.ws.rs.core.Context; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.Response; -import javax.ws.rs.core.SecurityContext; -import javax.ws.rs.core.UriInfo; - -import org.apache.commons.exec.ExecuteException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.security.authentication.client.PseudoAuthenticator; -import org.apache.hcatalog.templeton.tool.TempletonUtils; - -/** - * The Templeton Web API server. - */ -@Path("/v1") -public class Server { - public static final String VERSION = "v1"; - public static final String DO_AS_PARAM = "doAs"; - - /** - * The status message. Always "ok" - */ - public static final Map STATUS_OK = createStatusMsg(); - - /** - * The list of supported api versions. - */ - public static final Map SUPPORTED_VERSIONS = createVersions(); - - /** - * The list of supported return formats. Always json. - */ - public static final Map SUPPORTED_FORMATS = createFormats(); - - // Build the status message for the /status call. - private static Map createStatusMsg() { - HashMap res = new HashMap(); - res.put("status", "ok"); - res.put("version", VERSION); - - return Collections.unmodifiableMap(res); - } - - // Build the versions list. - private static Map createVersions() { - ArrayList versions = new ArrayList(); - versions.add(VERSION); - - HashMap res = new HashMap(); - res.put("supportedVersions", versions); - res.put("version", VERSION); - - return Collections.unmodifiableMap(res); - } - - // Build the supported formats list - private static Map createFormats() { - ArrayList formats = new ArrayList(); - formats.add(MediaType.APPLICATION_JSON); - HashMap res = new HashMap(); - res.put("responseTypes", formats); - - return Collections.unmodifiableMap(res); - } - - protected static ExecService execService = ExecServiceImpl.getInstance(); - private static AppConfig appConf = Main.getAppConfigInstance(); - - // The SecurityContext set by AuthFilter - private - @Context - SecurityContext theSecurityContext; - - // The uri requested - private - @Context - UriInfo theUriInfo; - private @QueryParam(DO_AS_PARAM) String doAs; - private @Context HttpServletRequest request; - - private static final Log LOG = LogFactory.getLog(Server.class); - - /** - * Check the status of this server. Always OK. - */ - @GET - @Path("status") - @Produces({MediaType.APPLICATION_JSON}) - public Map status() { - return STATUS_OK; - } - - /** - * Check the supported request formats of this server. - */ - @GET - @Produces({MediaType.APPLICATION_JSON}) - public Map requestFormats() { - return SUPPORTED_FORMATS; - } - - /** - * Check the version(s) supported by this server. - */ - @GET - @Path("version") - @Produces({MediaType.APPLICATION_JSON}) - public Map version() { - return SUPPORTED_VERSIONS; - } - - /** - * Execute an hcat ddl expression on the local box. It is run - * as the authenticated user and rate limited. - */ - @POST - @Path("ddl") - @Produces({MediaType.APPLICATION_JSON}) - public ExecBean ddl(@FormParam("exec") String exec, - @FormParam("group") String group, - @FormParam("permissions") String permissions) - throws NotAuthorizedException, BusyException, BadParam, - ExecuteException, IOException { - verifyUser(); - verifyParam(exec, "exec"); - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.run(getDoAsUser(), exec, false, group, permissions); - } - - /** - * List all the tables in an hcat database. - */ - @GET - @Path("ddl/database/{db}/table") - @Produces(MediaType.APPLICATION_JSON) - public Response listTables(@PathParam("db") String db, - @QueryParam("like") String tablePattern) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - - HcatDelegator d = new HcatDelegator(appConf, execService); - if (!TempletonUtils.isset(tablePattern)) - tablePattern = "*"; - return d.listTables(getDoAsUser(), db, tablePattern); - } - - /** - * Create a new table. - */ - @PUT - @Path("ddl/database/{db}/table/{table}") - @Produces(MediaType.APPLICATION_JSON) - public Response createTable(@PathParam("db") String db, - @PathParam("table") String table, - TableDesc desc) - throws SimpleWebException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - desc.table = table; - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.createTable(getDoAsUser(), db, desc); - } - - /** - * Create a new table like another table. - */ - @PUT - @Path("ddl/database/{db}/table/{existingTable}/like/{newTable}") - @Produces(MediaType.APPLICATION_JSON) - public Response createTableLike(@PathParam("db") String db, - @PathParam("existingTable") String existingTable, - @PathParam("newTable") String newTable, - TableLikeDesc desc) - throws SimpleWebException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(existingTable, ":existingTable"); - verifyDdlParam(newTable, ":newTable"); - desc.existingTable = existingTable; - desc.newTable = newTable; - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.createTableLike(getDoAsUser(), db, desc); - } - - /** - * Describe an hcat table. This is normally a simple list of - * columns (using "desc table"), but the extended format will show - * more information (using "show table extended like"). - */ - @GET - @Path("ddl/database/{db}/table/{table}") - @Produces(MediaType.APPLICATION_JSON) - public Response descTable(@PathParam("db") String db, - @PathParam("table") String table, - @QueryParam("format") String format) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - - HcatDelegator d = new HcatDelegator(appConf, execService); - if ("extended".equals(format)) - return d.descExtendedTable(getDoAsUser(), db, table); - else - return d.descTable(getDoAsUser(), db, table, false); - } - - /** - * Drop an hcat table. - */ - @DELETE - @Path("ddl/database/{db}/table/{table}") - @Produces(MediaType.APPLICATION_JSON) - public Response dropTable(@PathParam("db") String db, - @PathParam("table") String table, - @QueryParam("ifExists") boolean ifExists, - @QueryParam("group") String group, - @QueryParam("permissions") String permissions) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.dropTable(getDoAsUser(), db, table, ifExists, group, permissions); - } - - /** - * Rename an hcat table. - */ - @POST - @Path("ddl/database/{db}/table/{table}") - @Produces(MediaType.APPLICATION_JSON) - public Response renameTable(@PathParam("db") String db, - @PathParam("table") String oldTable, - @FormParam("rename") String newTable, - @FormParam("group") String group, - @FormParam("permissions") String permissions) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(oldTable, ":table"); - verifyDdlParam(newTable, "rename"); - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.renameTable(getDoAsUser(), db, oldTable, newTable, group, permissions); - } - - /** - * Describe a single property on an hcat table. - */ - @GET - @Path("ddl/database/{db}/table/{table}/property/{property}") - @Produces(MediaType.APPLICATION_JSON) - public Response descOneTableProperty(@PathParam("db") String db, - @PathParam("table") String table, - @PathParam("property") String property) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - verifyDdlParam(property, ":property"); - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.descTableProperty(getDoAsUser(), db, table, property); - } - - /** - * List all the properties on an hcat table. - */ - @GET - @Path("ddl/database/{db}/table/{table}/property") - @Produces(MediaType.APPLICATION_JSON) - public Response listTableProperties(@PathParam("db") String db, - @PathParam("table") String table) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.listTableProperties(getDoAsUser(), db, table); - } - - /** - * Add a single property on an hcat table. - */ - @PUT - @Path("ddl/database/{db}/table/{table}/property/{property}") - @Produces(MediaType.APPLICATION_JSON) - public Response addOneTableProperty(@PathParam("db") String db, - @PathParam("table") String table, - @PathParam("property") String property, - TablePropertyDesc desc) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - verifyDdlParam(property, ":property"); - desc.name = property; - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.addOneTableProperty(getDoAsUser(), db, table, desc); - } - - /** - * List all the partitions in an hcat table. - */ - @GET - @Path("ddl/database/{db}/table/{table}/partition") - @Produces(MediaType.APPLICATION_JSON) - public Response listPartitions(@PathParam("db") String db, - @PathParam("table") String table) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.listPartitions(getDoAsUser(), db, table); - } - - /** - * Describe a single partition in an hcat table. - */ - @GET - @Path("ddl/database/{db}/table/{table}/partition/{partition}") - @Produces(MediaType.APPLICATION_JSON) - public Response descPartition(@PathParam("db") String db, - @PathParam("table") String table, - @PathParam("partition") String partition) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - verifyParam(partition, ":partition"); - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.descOnePartition(getDoAsUser(), db, table, partition); - } - - /** - * Create a partition in an hcat table. - */ - @PUT - @Path("ddl/database/{db}/table/{table}/partition/{partition}") - @Produces(MediaType.APPLICATION_JSON) - public Response addOnePartition(@PathParam("db") String db, - @PathParam("table") String table, - @PathParam("partition") String partition, - PartitionDesc desc) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - verifyParam(partition, ":partition"); - desc.partition = partition; - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.addOnePartition(getDoAsUser(), db, table, desc); - } - - /** - * Drop a partition in an hcat table. - */ - @DELETE - @Path("ddl/database/{db}/table/{table}/partition/{partition}") - @Produces(MediaType.APPLICATION_JSON) - public Response dropPartition(@PathParam("db") String db, - @PathParam("table") String table, - @PathParam("partition") String partition, - @QueryParam("ifExists") boolean ifExists, - @QueryParam("group") String group, - @QueryParam("permissions") String permissions) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - verifyParam(partition, ":partition"); - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.dropPartition(getDoAsUser(), db, table, partition, ifExists, - group, permissions); - } - - /** - * List all databases, or those that match a pattern. - */ - @GET - @Path("ddl/database/") - @Produces(MediaType.APPLICATION_JSON) - public Response listDatabases(@QueryParam("like") String dbPattern) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - - HcatDelegator d = new HcatDelegator(appConf, execService); - if (!TempletonUtils.isset(dbPattern)) - dbPattern = "*"; - return d.listDatabases(getDoAsUser(), dbPattern); - } - - /** - * Describe a database - */ - @GET - @Path("ddl/database/{db}") - @Produces(MediaType.APPLICATION_JSON) - public Response descDatabase(@PathParam("db") String db, - @QueryParam("format") String format) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.descDatabase(getDoAsUser(), db, "extended".equals(format)); - } - - /** - * Create a database - */ - @PUT - @Path("ddl/database/{db}") - @Produces(MediaType.APPLICATION_JSON) - public Response createDatabase(@PathParam("db") String db, - DatabaseDesc desc) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - desc.database = db; - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.createDatabase(getDoAsUser(), desc); - } - - /** - * Drop a database - */ - @DELETE - @Path("ddl/database/{db}") - @Produces(MediaType.APPLICATION_JSON) - public Response dropDatabase(@PathParam("db") String db, - @QueryParam("ifExists") boolean ifExists, - @QueryParam("option") String option, - @QueryParam("group") String group, - @QueryParam("permissions") String permissions) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - if (TempletonUtils.isset(option)) - verifyDdlParam(option, "option"); - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.dropDatabase(getDoAsUser(), db, ifExists, option, - group, permissions); - } - - /** - * List the columns in an hcat table. Currently the same as - * describe table. - */ - @GET - @Path("ddl/database/{db}/table/{table}/column") - @Produces(MediaType.APPLICATION_JSON) - public Response listColumns(@PathParam("db") String db, - @PathParam("table") String table) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.listColumns(getDoAsUser(), db, table); - } - - /** - * Describe a single column in an hcat table. - */ - @GET - @Path("ddl/database/{db}/table/{table}/column/{column}") - @Produces(MediaType.APPLICATION_JSON) - public Response descColumn(@PathParam("db") String db, - @PathParam("table") String table, - @PathParam("column") String column) - throws SimpleWebException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - verifyParam(column, ":column"); - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.descOneColumn(getDoAsUser(), db, table, column); - } - - /** - * Create a column in an hcat table. - */ - @PUT - @Path("ddl/database/{db}/table/{table}/column/{column}") - @Produces(MediaType.APPLICATION_JSON) - public Response addOneColumn(@PathParam("db") String db, - @PathParam("table") String table, - @PathParam("column") String column, - ColumnDesc desc) - throws HcatException, NotAuthorizedException, BusyException, - BadParam, ExecuteException, IOException { - verifyUser(); - verifyDdlParam(db, ":db"); - verifyDdlParam(table, ":table"); - verifyParam(column, ":column"); - verifyParam(desc.type, "type"); - desc.name = column; - - HcatDelegator d = new HcatDelegator(appConf, execService); - return d.addOneColumn(getDoAsUser(), db, table, desc); - } - - /** - * Run a MapReduce Streaming job. - */ - @POST - @Path("mapreduce/streaming") - @Produces({MediaType.APPLICATION_JSON}) - public EnqueueBean mapReduceStreaming(@FormParam("input") List inputs, - @FormParam("output") String output, - @FormParam("mapper") String mapper, - @FormParam("reducer") String reducer, - @FormParam("file") List files, - @FormParam("define") List defines, - @FormParam("cmdenv") List cmdenvs, - @FormParam("arg") List args, - @FormParam("statusdir") String statusdir, - @FormParam("callback") String callback) - throws NotAuthorizedException, BusyException, BadParam, QueueException, - ExecuteException, IOException, InterruptedException { - verifyUser(); - verifyParam(inputs, "input"); - verifyParam(mapper, "mapper"); - verifyParam(reducer, "reducer"); - - StreamingDelegator d = new StreamingDelegator(appConf); - return d.run(getDoAsUser(), inputs, output, mapper, reducer, - files, defines, cmdenvs, args, - statusdir, callback, getCompletedUrl()); - } - - /** - * Run a MapReduce Jar job. - */ - @POST - @Path("mapreduce/jar") - @Produces({MediaType.APPLICATION_JSON}) - public EnqueueBean mapReduceJar(@FormParam("jar") String jar, - @FormParam("class") String mainClass, - @FormParam("libjars") String libjars, - @FormParam("files") String files, - @FormParam("arg") List args, - @FormParam("define") List defines, - @FormParam("statusdir") String statusdir, - @FormParam("callback") String callback) - throws NotAuthorizedException, BusyException, BadParam, QueueException, - ExecuteException, IOException, InterruptedException { - verifyUser(); - verifyParam(jar, "jar"); - verifyParam(mainClass, "class"); - - JarDelegator d = new JarDelegator(appConf); - return d.run(getDoAsUser(), - jar, mainClass, - libjars, files, args, defines, - statusdir, callback, getCompletedUrl()); - } - - /** - * Run a Pig job. - */ - @POST - @Path("pig") - @Produces({MediaType.APPLICATION_JSON}) - public EnqueueBean pig(@FormParam("execute") String execute, - @FormParam("file") String srcFile, - @FormParam("arg") List pigArgs, - @FormParam("files") String otherFiles, - @FormParam("statusdir") String statusdir, - @FormParam("callback") String callback) - throws NotAuthorizedException, BusyException, BadParam, QueueException, - ExecuteException, IOException, InterruptedException { - verifyUser(); - if (execute == null && srcFile == null) - throw new BadParam("Either execute or file parameter required"); - - PigDelegator d = new PigDelegator(appConf); - return d.run(getDoAsUser(), - execute, srcFile, - pigArgs, otherFiles, - statusdir, callback, getCompletedUrl()); - } - - /** - * Run a Hive job. - */ - @POST - @Path("hive") - @Produces({MediaType.APPLICATION_JSON}) - public EnqueueBean hive(@FormParam("execute") String execute, - @FormParam("file") String srcFile, - @FormParam("define") List defines, - @FormParam("statusdir") String statusdir, - @FormParam("callback") String callback) - throws NotAuthorizedException, BusyException, BadParam, QueueException, - ExecuteException, IOException, InterruptedException { - verifyUser(); - if (execute == null && srcFile == null) - throw new BadParam("Either execute or file parameter required"); - - HiveDelegator d = new HiveDelegator(appConf); - return d.run(getDoAsUser(), execute, srcFile, defines, - statusdir, callback, getCompletedUrl()); - } - - /** - * Return the status of the jobid. - */ - @GET - @Path("queue/{jobid}") - @Produces({MediaType.APPLICATION_JSON}) - public QueueStatusBean showQueueId(@PathParam("jobid") String jobid) - throws NotAuthorizedException, BadParam, IOException, InterruptedException { - - verifyUser(); - verifyParam(jobid, ":jobid"); - - StatusDelegator d = new StatusDelegator(appConf); - return d.run(getDoAsUser(), jobid); - } - - /** - * Kill a job in the queue. - */ - @DELETE - @Path("queue/{jobid}") - @Produces({MediaType.APPLICATION_JSON}) - public QueueStatusBean deleteQueueId(@PathParam("jobid") String jobid) - throws NotAuthorizedException, BadParam, IOException, InterruptedException { - - verifyUser(); - verifyParam(jobid, ":jobid"); - - DeleteDelegator d = new DeleteDelegator(appConf); - return d.run(getDoAsUser(), jobid); - } - - /** - * Return all the known job ids for this user. - */ - @GET - @Path("queue") - @Produces({MediaType.APPLICATION_JSON}) - public List showQueueList(@QueryParam("showall") boolean showall) - throws NotAuthorizedException, BadParam, IOException, InterruptedException { - - verifyUser(); - - ListDelegator d = new ListDelegator(appConf); - return d.run(getDoAsUser(), showall); - } - - /** - * Notify on a completed job. - */ - @GET - @Path("internal/complete/{jobid}") - @Produces({MediaType.APPLICATION_JSON}) - public CompleteBean completeJob(@PathParam("jobid") String jobid) - throws CallbackFailedException, IOException { - CompleteDelegator d = new CompleteDelegator(appConf); - return d.run(jobid); - } - - /** - * Verify that we have a valid user. Throw an exception if invalid. - */ - public void verifyUser() throws NotAuthorizedException { - String requestingUser = getRequestingUser(); - if (requestingUser == null) { - String msg = "No user found."; - if (!UserGroupInformation.isSecurityEnabled()) - msg += " Missing " + PseudoAuthenticator.USER_NAME + " parameter."; - throw new NotAuthorizedException(msg); - } - if(doAs != null && !doAs.equals(requestingUser)) { - /*if doAs user is different than logged in user, need to check that - that logged in user is authorized to run as 'doAs'*/ - ProxyUserSupport.validate(requestingUser, getRequestingHost(requestingUser, request), doAs); - } - } - /** - * All 'tasks' spawned by WebHCat should be run as this user. W/o doAs query parameter - * this is just the user making the request (or - * {@link org.apache.hadoop.security.authentication.client.PseudoAuthenticator#USER_NAME} - * query param). - * @return value of doAs query parameter or {@link #getRequestingUser()} - */ - private String getDoAsUser() { - return doAs != null && !doAs.equals(getRequestingUser()) ? doAs : getRequestingUser(); - } - /** - * Verify that the parameter exists. Throw an exception if invalid. - */ - public void verifyParam(String param, String name) - throws BadParam { - if (param == null) - throw new BadParam("Missing " + name + " parameter"); - } - - /** - * Verify that the parameter exists. Throw an exception if invalid. - */ - public void verifyParam(List param, String name) - throws BadParam { - if (param == null || param.isEmpty()) - throw new BadParam("Missing " + name + " parameter"); - } - - public static final Pattern DDL_ID = Pattern.compile("[a-zA-Z]\\w*"); - - /** - * Verify that the parameter exists and is a simple DDL identifier - * name. Throw an exception if invalid. - * - * Bug: This needs to allow for quoted ddl identifiers. - */ - public void verifyDdlParam(String param, String name) - throws BadParam { - verifyParam(param, name); - Matcher m = DDL_ID.matcher(param); - if (!m.matches()) - throw new BadParam("Invalid DDL identifier " + name); - } - /** - * Get the user name from the security context, i.e. the user making the HTTP request. - * With simple/pseudo security mode this should return the - * value of user.name query param, in kerberos mode it's the kinit'ed user. - */ - private String getRequestingUser() { - if (theSecurityContext == null) - return null; - if (theSecurityContext.getUserPrincipal() == null) - return null; - //map hue/foo.bar@something.com->hue since user group checks - // and config files are in terms of short name - return UserGroupInformation.createRemoteUser( - theSecurityContext.getUserPrincipal().getName()).getShortUserName(); - } - - /** - * The callback url on this server when a task is completed. - */ - public String getCompletedUrl() { - if (theUriInfo == null) - return null; - if (theUriInfo.getBaseUri() == null) - return null; - return theUriInfo.getBaseUri() + VERSION - + "/internal/complete/$jobId"; - } - /** - * Returns canonical host name from which the request is made; used for doAs validation - */ - private static String getRequestingHost(String requestingUser, HttpServletRequest request) { - final String unkHost = "???"; - if(request == null) { - LOG.warn("request is null; cannot determine hostname"); - return unkHost; - } - try { - String address = request.getRemoteAddr();//returns IP addr - if(address == null) { - LOG.warn(MessageFormat.format("Request remote address is NULL for user [{0}]", requestingUser)); - return unkHost; - } - - //Inet4Address/Inet6Address - String hostName = InetAddress.getByName(address).getCanonicalHostName(); - if(LOG.isDebugEnabled()) { - LOG.debug(MessageFormat.format("Resolved remote hostname: [{0}]", hostName)); - } - return hostName; - - } catch (UnknownHostException ex) { - LOG.warn(MessageFormat.format("Request remote address could not be resolved, {0}", ex.toString(), ex)); - return unkHost; - } - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/SimpleExceptionMapper.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/SimpleExceptionMapper.java deleted file mode 100644 index 33dfab7..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/SimpleExceptionMapper.java +++ /dev/null @@ -1,35 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import javax.ws.rs.core.Response; -import javax.ws.rs.ext.ExceptionMapper; -import javax.ws.rs.ext.Provider; - -/** - * Map our exceptions to the Jersey response. This lets us have nice - * results in the error body. - */ -@Provider -public class SimpleExceptionMapper - implements ExceptionMapper { - public Response toResponse(SimpleWebException e) { - return e.getResponse(); - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/SimpleWebException.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/SimpleWebException.java deleted file mode 100644 index f34de6e..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/SimpleWebException.java +++ /dev/null @@ -1,72 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.io.IOException; -import java.util.Map; -import java.util.HashMap; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.Response; - -import org.codehaus.jackson.map.ObjectMapper; - -/** - * Simple exception that will return a json error payload if thrown - * from a JAX web server. We skip using WebApplicationException and - * instead map our own so that Jersey doesn't log our exceptions as - * error in the output log. See SimpleExceptionMapper. - */ -public class SimpleWebException extends Throwable { - public int httpCode; - public Map params; - - public SimpleWebException(int httpCode, String msg) { - super(msg); - this.httpCode = httpCode; - } - - public SimpleWebException(int httpCode, String msg, Map params) { - super(msg); - this.httpCode = httpCode; - this.params = params; - } - - public Response getResponse() { - return buildMessage(httpCode, params, getMessage()); - } - - public static Response buildMessage(int httpCode, Map params, - String msg) { - HashMap err = new HashMap(); - err.put("error", msg); - if (params != null) - err.putAll(params); - - String json = "\"error\""; - try { - json = new ObjectMapper().writeValueAsString(err); - } catch (IOException e) { - } - - return Response.status(httpCode) - .entity(json) - .type(MediaType.APPLICATION_JSON) - .build(); - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/StatusDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/StatusDelegator.java deleted file mode 100644 index 3f3470f..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/StatusDelegator.java +++ /dev/null @@ -1,111 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.io.IOException; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.shims.HadoopShims.WebHCatJTShim; -import org.apache.hadoop.hive.shims.ShimLoader; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.mapred.JobID; -import org.apache.hadoop.mapred.JobProfile; -import org.apache.hadoop.mapred.JobStatus; -import org.apache.hcatalog.templeton.tool.JobState; - -/** - * Fetch the status of a given job id in the queue. - */ -public class StatusDelegator extends TempletonDelegator { - private static final Log LOG = LogFactory.getLog(StatusDelegator.class); - - public StatusDelegator(AppConfig appConf) { - super(appConf); - } - - public QueueStatusBean run(String user, String id) - throws NotAuthorizedException, BadParam, IOException, InterruptedException - { - WebHCatJTShim tracker = null; - JobState state = null; - try { - UserGroupInformation ugi = UgiFactory.getUgi(user); - tracker = ShimLoader.getHadoopShims().getWebHCatShim(appConf, ugi); - JobID jobid = StatusDelegator.StringToJobID(id); - if (jobid == null) - throw new BadParam("Invalid jobid: " + id); - state = new JobState(id, Main.getAppConfigInstance()); - return StatusDelegator.makeStatus(tracker, jobid, state); - } catch (IllegalStateException e) { - throw new BadParam(e.getMessage()); - } finally { - if (tracker != null) - tracker.close(); - if (state != null) - state.close(); - } - } - - public static QueueStatusBean makeStatus(WebHCatJTShim tracker, - JobID jobid, - String childid, - JobState state) - throws BadParam, IOException { - JobID bestid = jobid; - if (childid != null) - bestid = StatusDelegator.StringToJobID(childid); - - JobStatus status = tracker.getJobStatus(bestid); - JobProfile profile = tracker.getJobProfile(bestid); - - if (status == null || profile == null) { - if (bestid != jobid) { // Corrupt childid, retry. - LOG.error("Corrupt child id " + childid + " for " + jobid); - bestid = jobid; - status = tracker.getJobStatus(bestid); - profile = tracker.getJobProfile(bestid); - } - } - - if (status == null || profile == null) // No such job. - throw new BadParam("Could not find job " + bestid); - - return new QueueStatusBean(state, status, profile); - } - - public static QueueStatusBean makeStatus(WebHCatJTShim tracker, - JobID jobid, - JobState state) - throws BadParam, IOException { - return makeStatus(tracker, jobid, state.getChildId(), state); - } - - /** - * A version of JobID.forName with our app specific error handling. - */ - public static JobID StringToJobID(String id) - throws BadParam { - try { - return JobID.forName(id); - } catch (IllegalArgumentException e) { - throw new BadParam(e.getMessage()); - } - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/StreamingDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/StreamingDelegator.java deleted file mode 100644 index 85557ba..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/StreamingDelegator.java +++ /dev/null @@ -1,89 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.exec.ExecuteException; - -/** - * Submit a streaming job to the MapReduce queue. Really just a front - end to the JarDelegator. - * - * This is the backend of the mapreduce/streaming web service. - */ -public class StreamingDelegator extends LauncherDelegator { - public StreamingDelegator(AppConfig appConf) { - super(appConf); - } - - public EnqueueBean run(String user, - List inputs, String output, - String mapper, String reducer, - List files, List defines, - List cmdenvs, - List jarArgs, - String statusdir, - String callback, - String completedUrl) - throws NotAuthorizedException, BadParam, BusyException, QueueException, - ExecuteException, IOException, InterruptedException { - List args = makeArgs(inputs, output, mapper, reducer, - files, defines, cmdenvs, jarArgs); - - JarDelegator d = new JarDelegator(appConf); - return d.run(user, - appConf.streamingJar(), null, - null, null, args, defines, - statusdir, callback, completedUrl); - } - - private List makeArgs(List inputs, - String output, - String mapper, - String reducer, - List files, - List defines, - List cmdenvs, - List jarArgs) { - ArrayList args = new ArrayList(); - for (String input : inputs) { - args.add("-input"); - args.add(input); - } - args.add("-output"); - args.add(output); - args.add("-mapper"); - args.add(mapper); - args.add("-reducer"); - args.add(reducer); - - for (String f : files) - args.add("-file" + f); - for (String d : defines) - args.add("-D" + d); - for (String e : cmdenvs) - args.add("-cmdenv" + e); - args.addAll(jarArgs); - - return args; - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/TableDesc.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/TableDesc.java deleted file mode 100644 index 2c0b42d..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/TableDesc.java +++ /dev/null @@ -1,245 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.util.List; -import java.util.Map; -import javax.xml.bind.annotation.XmlRootElement; - -/** - * A description of the table to create. - */ -@XmlRootElement -public class TableDesc extends GroupPermissionsDesc { - public boolean external = false; - public boolean ifNotExists = false; - public String table; - public String comment; - public List columns; - public List partitionedBy; - public ClusteredByDesc clusteredBy; - public StorageFormatDesc format; - public String location; - public Map tableProperties; - - /** - * Create a new TableDesc - */ - public TableDesc() { - } - - public String toString() { - return String.format("TableDesc(table=%s, columns=%s)", table, columns); - } - - public boolean equals(Object o) { - if (this == o) - return true; - if (!(o instanceof TableDesc)) - return false; - TableDesc that = (TableDesc) o; - return xequals(this.external, that.external) - && xequals(this.ifNotExists, that.ifNotExists) - && xequals(this.table, that.table) - && xequals(this.comment, that.comment) - && xequals(this.columns, that.columns) - && xequals(this.partitionedBy, that.partitionedBy) - && xequals(this.clusteredBy, that.clusteredBy) - && xequals(this.format, that.format) - && xequals(this.location, that.location) - && xequals(this.tableProperties, that.tableProperties) - && super.equals(that) - ; - } - - /** - * How to cluster the table. - */ - @XmlRootElement - public static class ClusteredByDesc { - public List columnNames; - public List sortedBy; - public int numberOfBuckets; - - public ClusteredByDesc() { - } - - public String toString() { - String fmt - = "ClusteredByDesc(columnNames=%s, sortedBy=%s, numberOfBuckets=%s)"; - return String.format(fmt, columnNames, sortedBy, numberOfBuckets); - } - - public boolean equals(Object o) { - if (this == o) - return true; - if (!(o instanceof ClusteredByDesc)) - return false; - ClusteredByDesc that = (ClusteredByDesc) o; - return xequals(this.columnNames, that.columnNames) - && xequals(this.sortedBy, that.sortedBy) - && xequals(this.numberOfBuckets, that.numberOfBuckets) - ; - } - } - - /** - * The clustered sort order. - */ - @XmlRootElement - public static class ClusterSortOrderDesc { - public String columnName; - public SortDirectionDesc order; - - public ClusterSortOrderDesc() { - } - - public ClusterSortOrderDesc(String columnName, SortDirectionDesc order) { - this.columnName = columnName; - this.order = order; - } - - public String toString() { - return String - .format("ClusterSortOrderDesc(columnName=%s, order=%s)", - columnName, order); - } - - public boolean equals(Object o) { - if (this == o) - return true; - if (!(o instanceof ClusterSortOrderDesc)) - return false; - ClusterSortOrderDesc that = (ClusterSortOrderDesc) o; - return xequals(this.columnName, that.columnName) - && xequals(this.order, that.order) - ; - } - } - - /** - * Ther ASC or DESC sort order. - */ - @XmlRootElement - public static enum SortDirectionDesc { - ASC, DESC - } - - /** - * The storage format. - */ - @XmlRootElement - public static class StorageFormatDesc { - public RowFormatDesc rowFormat; - public String storedAs; - public StoredByDesc storedBy; - - public StorageFormatDesc() { - } - - public boolean equals(Object o) { - if (this == o) - return true; - if (!(o instanceof StorageFormatDesc)) - return false; - StorageFormatDesc that = (StorageFormatDesc) o; - return xequals(this.rowFormat, that.rowFormat) - && xequals(this.storedAs, that.storedAs) - && xequals(this.storedBy, that.storedBy) - ; - } - } - - /** - * The Row Format. - */ - @XmlRootElement - public static class RowFormatDesc { - public String fieldsTerminatedBy; - public String collectionItemsTerminatedBy; - public String mapKeysTerminatedBy; - public String linesTerminatedBy; - public SerdeDesc serde; - - public RowFormatDesc() { - } - - public boolean equals(Object o) { - if (this == o) - return true; - if (!(o instanceof RowFormatDesc)) - return false; - RowFormatDesc that = (RowFormatDesc) o; - return xequals(this.fieldsTerminatedBy, that.fieldsTerminatedBy) - && xequals(this.collectionItemsTerminatedBy, - that.collectionItemsTerminatedBy) - && xequals(this.mapKeysTerminatedBy, that.mapKeysTerminatedBy) - && xequals(this.linesTerminatedBy, that.linesTerminatedBy) - && xequals(this.serde, that.serde) - ; - } - } - - /** - * The SERDE Row Format. - */ - @XmlRootElement - public static class SerdeDesc { - public String name; - public Map properties; - - public SerdeDesc() { - } - - public boolean equals(Object o) { - if (this == o) - return true; - if (!(o instanceof SerdeDesc)) - return false; - SerdeDesc that = (SerdeDesc) o; - return xequals(this.name, that.name) - && xequals(this.properties, that.properties) - ; - } - } - - /** - * How to store the table. - */ - @XmlRootElement - public static class StoredByDesc { - public String className; - public Map properties; - - public StoredByDesc() { - } - - public boolean equals(Object o) { - if (this == o) - return true; - if (!(o instanceof StoredByDesc)) - return false; - StoredByDesc that = (StoredByDesc) o; - return xequals(this.className, that.className) - && xequals(this.properties, that.properties) - ; - } - } - -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/TableLikeDesc.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/TableLikeDesc.java deleted file mode 100644 index 1c8e7b9..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/TableLikeDesc.java +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import javax.xml.bind.annotation.XmlRootElement; - -/** - * A description of the table to create that's like another table. - */ -@XmlRootElement -public class TableLikeDesc extends GroupPermissionsDesc { - public boolean external = false; - public boolean ifNotExists = false; - public String location; - public String existingTable; - public String newTable; - - public TableLikeDesc() { - } - - public String toString() { - return String.format("TableLikeDesc(existingTable=%s, newTable=%s, location=%s", - existingTable, newTable, location); - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/TablePropertyDesc.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/TablePropertyDesc.java deleted file mode 100644 index 8a17c9a..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/TablePropertyDesc.java +++ /dev/null @@ -1,37 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import javax.xml.bind.annotation.XmlRootElement; - -/** - * A description of a table property. - */ -@XmlRootElement -public class TablePropertyDesc extends GroupPermissionsDesc { - public String name; - public String value; - - public TablePropertyDesc() {} - - public String toString() { - return String.format("TablePropertyDesc(name=%s, value=%s)", - name, value); - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/TempletonDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/TempletonDelegator.java deleted file mode 100644 index d2edaca..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/TempletonDelegator.java +++ /dev/null @@ -1,32 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -/** - * The helper class for all the Templeton delegator classes. A - * delegator will call the underlying Templeton service such as hcat - * or hive. - */ -public class TempletonDelegator { - protected AppConfig appConf; - - public TempletonDelegator(AppConfig appConf) { - this.appConf = appConf; - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/UgiFactory.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/UgiFactory.java deleted file mode 100644 index d717771..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/UgiFactory.java +++ /dev/null @@ -1,50 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.io.IOException; -import java.util.concurrent.ConcurrentHashMap; - -import org.apache.hadoop.security.UserGroupInformation; - -public class UgiFactory { - private static ConcurrentHashMap userUgiMap = - new ConcurrentHashMap(); - - public static UserGroupInformation getUgi(String user) throws IOException { - UserGroupInformation ugi = userUgiMap.get(user); - if (ugi == null) { - //create new ugi and add to map - final UserGroupInformation newUgi = - UserGroupInformation.createProxyUser(user, - UserGroupInformation.getLoginUser()); - - //if another thread adds an entry before the check in this one - // the one created here will not be added. - userUgiMap.putIfAbsent(user, newUgi); - - //use the UGI object that got added - return userUgiMap.get(user); - - } - return ugi; - } - - -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/WadlConfig.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/WadlConfig.java deleted file mode 100644 index 03ad328..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/WadlConfig.java +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.util.List; - -import com.sun.jersey.api.wadl.config.WadlGeneratorConfig; -import com.sun.jersey.api.wadl.config.WadlGeneratorDescription; -import com.sun.jersey.server.wadl.generators.resourcedoc.WadlGeneratorResourceDocSupport; - -/** - * Simple class that incorporates javadoc information into the - * wadl produced by jersey. - * - */ -public class WadlConfig extends WadlGeneratorConfig { - - @Override - public List configure() { - return generator(WadlGeneratorResourceDocSupport.class) - .prop("resourceDocStream", "resourcedoc.xml") - .descriptions(); - } - -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/HDFSCleanup.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/HDFSCleanup.java deleted file mode 100644 index 68830a3..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/HDFSCleanup.java +++ /dev/null @@ -1,151 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton.tool; - -import java.io.IOException; -import java.util.Date; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hcatalog.templeton.tool.TempletonStorage.Type; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -/** - * This does periodic cleanup - */ -public class HDFSCleanup extends Thread { - protected Configuration appConf; - - // The interval to wake up and check the queue - public static final String HDFS_CLEANUP_INTERVAL = - "templeton.hdfs.cleanup.interval"; // 12 hours - - // The max age of a task allowed - public static final String HDFS_CLEANUP_MAX_AGE = - "templeton.hdfs.cleanup.maxage"; // ~ 1 week - - protected static long interval = 1000L * 60L * 60L * 12L; - protected static long maxage = 1000L * 60L * 60L * 24L * 7L; - - // The logger - private static final Log LOG = LogFactory.getLog(HDFSCleanup.class); - - // Handle to cancel loop - private boolean stop = false; - - // The instance - private static HDFSCleanup thisclass = null; - - // Whether the cycle is running - private static boolean isRunning = false; - - // The storage root - private String storage_root; - - /** - * Create a cleanup object. - */ - private HDFSCleanup(Configuration appConf) { - this.appConf = appConf; - interval = appConf.getLong(HDFS_CLEANUP_INTERVAL, interval); - maxage = appConf.getLong(HDFS_CLEANUP_MAX_AGE, maxage); - storage_root = appConf.get(TempletonStorage.STORAGE_ROOT); - } - - public static HDFSCleanup getInstance(Configuration appConf) { - if (thisclass != null) { - return thisclass; - } - thisclass = new HDFSCleanup(appConf); - return thisclass; - } - - public static void startInstance(Configuration appConf) throws IOException { - if (!isRunning) { - getInstance(appConf).start(); - } - } - - /** - * Run the cleanup loop. - * - */ - public void run() { - FileSystem fs = null; - while (!stop) { - try { - // Put each check in a separate try/catch, so if that particular - // cycle fails, it'll try again on the next cycle. - try { - if (fs == null) { - fs = FileSystem.get(appConf); - } - checkFiles(fs); - } catch (Exception e) { - LOG.error("Cleanup cycle failed: " + e.getMessage()); - } - - long sleepMillis = (long) (Math.random() * interval); - LOG.info("Next execution: " + new Date(new Date().getTime() - + sleepMillis)); - Thread.sleep(sleepMillis); - - } catch (Exception e) { - // If sleep fails, we should exit now before things get worse. - isRunning = false; - LOG.error("Cleanup failed: " + e.getMessage(), e); - } - } - isRunning = false; - } - - /** - * Loop through all the files, deleting any that are older than - * maxage. - * - * @param fs - * @throws IOException - */ - private void checkFiles(FileSystem fs) throws IOException { - long now = new Date().getTime(); - for (Type type : Type.values()) { - try { - for (FileStatus status : fs.listStatus(new Path( - HDFSStorage.getPath(type, storage_root)))) { - if (now - status.getModificationTime() > maxage) { - LOG.info("Deleting " + status.getPath().toString()); - fs.delete(status.getPath(), true); - } - } - } catch (Exception e) { - // Nothing to find for this type. - } - } - } - - // Handle to stop this process from the outside if needed. - public void exit() { - stop = true; - } - -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/HDFSStorage.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/HDFSStorage.java deleted file mode 100644 index 801546d..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/HDFSStorage.java +++ /dev/null @@ -1,257 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton.tool; - -import java.io.BufferedReader; -import java.io.Closeable; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.OutputStreamWriter; -import java.io.PrintWriter; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; - -/** - * HDFS implementation of templeton storage. - * - * This implementation assumes that all keys in key/value pairs are - * chosen such that they don't have any newlines in them. - * - */ -public class HDFSStorage implements TempletonStorage { - FileSystem fs = null; - - public String storage_root = null; - - public static final String JOB_PATH = "/jobs"; - public static final String JOB_TRACKINGPATH = "/created"; - public static final String OVERHEAD_PATH = "/overhead"; - - private static final Log LOG = LogFactory.getLog(HDFSStorage.class); - - public void startCleanup(Configuration config) { - try { - HDFSCleanup.startInstance(config); - } catch (Exception e) { - LOG.warn("Cleanup instance didn't start."); - } - } - - @Override - public void saveField(Type type, String id, String key, String val) - throws NotFoundException { - if (val == null) { - return; - } - PrintWriter out = null; - //todo: FileSystem#setPermission() - should this make sure to set 777 on jobs/ ? - Path keyfile= new Path(getPath(type) + "/" + id + "/" + key); - try { - // This will replace the old value if there is one - // Overwrite the existing file - out = new PrintWriter(new OutputStreamWriter(fs.create(keyfile))); - out.write(val); - out.flush(); - } catch (Exception e) { - String errMsg = "Couldn't write to " + keyfile + ": " + e.getMessage(); - LOG.error(errMsg, e); - throw new NotFoundException(errMsg, e); - } finally { - close(out); - } - } - - @Override - public String getField(Type type, String id, String key) { - BufferedReader in = null; - Path p = new Path(getPath(type) + "/" + id + "/" + key); - try { - in = new BufferedReader(new InputStreamReader(fs.open(p))); - String line = null; - String val = ""; - while ((line = in.readLine()) != null) { - if (!val.equals("")) { - val += "\n"; - } - val += line; - } - return val; - } catch (Exception e) { - LOG.info("Couldn't find " + p + ": " + e.getMessage(), e); - } finally { - close(in); - } - return null; - } - - @Override - public Map getFields(Type type, String id) { - HashMap map = new HashMap(); - BufferedReader in = null; - Path p = new Path(getPath(type) + "/" + id); - try { - for (FileStatus status : fs.listStatus(p)) { - in = new BufferedReader(new InputStreamReader(fs.open(status.getPath()))); - String line = null; - String val = ""; - while ((line = in.readLine()) != null) { - if (!val.equals("")) { - val += "\n"; - } - val += line; - } - map.put(status.getPath().getName(), val); - } - } catch (IOException e) { - LOG.trace("Couldn't find " + p); - } finally { - close(in); - } - return map; - } - - @Override - public boolean delete(Type type, String id) throws NotFoundException { - Path p = new Path(getPath(type) + "/" + id); - try { - fs.delete(p, true); - } catch (IOException e) { - throw new NotFoundException("Node " + p + " was not found: " + - e.getMessage()); - } - return false; - } - - @Override - public List getAll() { - ArrayList allNodes = new ArrayList(); - for (Type type : Type.values()) { - allNodes.addAll(getAllForType(type)); - } - return allNodes; - } - - @Override - public List getAllForType(Type type) { - ArrayList allNodes = new ArrayList(); - try { - for (FileStatus status : fs.listStatus(new Path(getPath(type)))) { - allNodes.add(status.getPath().getName()); - } - return null; - } catch (Exception e) { - LOG.trace("Couldn't find children for type " + type.toString()); - } - return allNodes; - } - - @Override - public List getAllForKey(String key, String value) { - ArrayList allNodes = new ArrayList(); - try { - for (Type type : Type.values()) { - allNodes.addAll(getAllForTypeAndKey(type, key, value)); - } - } catch (Exception e) { - LOG.trace("Couldn't find children for key " + key + ": " + - e.getMessage()); - } - return allNodes; - } - - @Override - public List getAllForTypeAndKey(Type type, String key, String value) { - ArrayList allNodes = new ArrayList(); - HashMap map = new HashMap(); - try { - for (FileStatus status : - fs.listStatus(new Path(getPath(type)))) { - map = (HashMap) - getFields(type, status.getPath().getName()); - if (map.get(key).equals(value)) { - allNodes.add(status.getPath().getName()); - } - } - } catch (Exception e) { - LOG.trace("Couldn't find children for key " + key + ": " + - e.getMessage()); - } - return allNodes; - } - - @Override - public void openStorage(Configuration config) throws IOException { - storage_root = config.get(TempletonStorage.STORAGE_ROOT); - if (fs == null) { - fs = FileSystem.get(config); - } - } - - @Override - public void closeStorage() throws IOException { - // Nothing to do here - } - - /** - * Get the path to storage based on the type. - * @param type - */ - public String getPath(Type type) { - return getPath(type, storage_root); - } - - /** - * Static method to get the path based on the type. - * - * @param type - * @param root - */ - public static String getPath(Type type, String root) { - String typepath = root + OVERHEAD_PATH; - switch (type) { - case JOB: - typepath = root + JOB_PATH; - break; - case JOBTRACKING: - typepath = root + JOB_TRACKINGPATH; - break; - } - return typepath; - } - private void close(Closeable is) { - if(is == null) { - return; - } - try { - is.close(); - } - catch (IOException ex) { - LOG.trace("Failed to close InputStream: " + ex.getMessage()); - } - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/JobState.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/JobState.java deleted file mode 100644 index ec121fd..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/JobState.java +++ /dev/null @@ -1,344 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton.tool; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; - -/** - * The persistent state of a job. The state is stored in one of the - * supported storage systems. - */ -public class JobState { - - private static final Log LOG = LogFactory.getLog(JobState.class); - - private String id; - - // Storage is instantiated in the constructor - private TempletonStorage storage = null; - - private static TempletonStorage.Type type = TempletonStorage.Type.JOB; - - private Configuration config = null; - - public JobState(String id, Configuration conf) - throws IOException { - this.id = id; - config = conf; - storage = getStorage(conf); - } - - public void delete() - throws IOException { - try { - storage.delete(type, id); - } catch (Exception e) { - // Error getting children of node -- probably node has been deleted - LOG.info("Couldn't delete " + id); - } - } - - /** - * Get an instance of the selected storage class. Defaults to - * HDFS storage if none is specified. - */ - public static TempletonStorage getStorageInstance(Configuration conf) { - TempletonStorage storage = null; - try { - storage = (TempletonStorage) - Class.forName(conf.get(TempletonStorage.STORAGE_CLASS)) - .newInstance(); - } catch (Exception e) { - LOG.warn("No storage method found: " + e.getMessage()); - try { - storage = new HDFSStorage(); - } catch (Exception ex) { - LOG.error("Couldn't create storage."); - } - } - return storage; - } - - /** - * Get an open instance of the selected storage class. Defaults - * to HDFS storage if none is specified. - */ - public static TempletonStorage getStorage(Configuration conf) throws IOException { - TempletonStorage storage = getStorageInstance(conf); - storage.openStorage(conf); - return storage; - } - - /** - * For storage methods that require a connection, this is a hint - * that it's time to close the connection. - */ - public void close() throws IOException { - storage.closeStorage(); - } - - // - // Properties - // - - /** - * This job id. - */ - public String getId() { - return id; - } - - /** - * The percent complete of a job - */ - public String getPercentComplete() - throws IOException { - return getField("percentComplete"); - } - - public void setPercentComplete(String percent) - throws IOException { - setField("percentComplete", percent); - } - - /** - * The child id of TempletonControllerJob - */ - public String getChildId() - throws IOException { - return getField("childid"); - } - - public void setChildId(String childid) - throws IOException { - setField("childid", childid); - } - - /** - * Add a jobid to the list of children of this job. - * - * @param jobid - * @throws IOException - */ - public void addChild(String jobid) throws IOException { - String jobids = ""; - try { - jobids = getField("children"); - } catch (Exception e) { - // There are none or they're not readable. - } - if (!jobids.equals("")) { - jobids += ","; - } - jobids += jobid; - setField("children", jobids); - } - - /** - * Get a list of jobstates for jobs that are children of this job. - * @throws IOException - */ - public List getChildren() throws IOException { - ArrayList children = new ArrayList(); - for (String jobid : getField("children").split(",")) { - children.add(new JobState(jobid, config)); - } - return children; - } - - /** - * Save a comma-separated list of jobids that are children - * of this job. - * @param jobids - * @throws IOException - */ - public void setChildren(String jobids) throws IOException { - setField("children", jobids); - } - - /** - * Set the list of child jobs of this job - * @param children - */ - public void setChildren(List children) throws IOException { - String val = ""; - for (JobState jobstate : children) { - if (!val.equals("")) { - val += ","; - } - val += jobstate.getId(); - } - setField("children", val); - } - - /** - * The system exit value of the job. - */ - public Long getExitValue() - throws IOException { - return getLongField("exitValue"); - } - - public void setExitValue(long exitValue) - throws IOException { - setLongField("exitValue", exitValue); - } - - /** - * When this job was created. - */ - public Long getCreated() - throws IOException { - return getLongField("created"); - } - - public void setCreated(long created) - throws IOException { - setLongField("created", created); - } - - /** - * The user who started this job. - */ - public String getUser() - throws IOException { - return getField("user"); - } - - public void setUser(String user) - throws IOException { - setField("user", user); - } - - /** - * The url callback - */ - public String getCallback() - throws IOException { - return getField("callback"); - } - - public void setCallback(String callback) - throws IOException { - setField("callback", callback); - } - - /** - * The status of a job once it is completed. - */ - public String getCompleteStatus() - throws IOException { - return getField("completed"); - } - - public void setCompleteStatus(String complete) - throws IOException { - setField("completed", complete); - } - - /** - * The time when the callback was sent. - */ - public Long getNotifiedTime() - throws IOException { - return getLongField("notified"); - } - - public void setNotifiedTime(long notified) - throws IOException { - setLongField("notified", notified); - } - - // - // Helpers - // - - /** - * Fetch an integer field from the store. - */ - public Long getLongField(String name) - throws IOException { - String s = storage.getField(type, id, name); - if (s == null) - return null; - else { - try { - return new Long(s); - } catch (NumberFormatException e) { - LOG.error("templeton: bug " + name + " " + s + " : " + e); - return null; - } - } - } - - /** - * Store a String field from the store. - */ - public void setField(String name, String val) - throws IOException { - try { - storage.saveField(type, id, name, val); - } catch (NotFoundException ne) { - throw new IOException(ne.getMessage()); - } - } - - public String getField(String name) - throws IOException { - return storage.getField(type, id, name); - } - - /** - * Store a long field. - * - * @param name - * @param val - * @throws IOException - */ - public void setLongField(String name, long val) - throws IOException { - try { - storage.saveField(type, id, name, String.valueOf(val)); - } catch (NotFoundException ne) { - throw new IOException("Job " + id + " was not found: " + - ne.getMessage()); - } - } - - /** - * Get an id for each currently existing job, which can be used to create - * a JobState object. - * - * @param conf - * @throws IOException - */ - public static List getJobs(Configuration conf) throws IOException { - try { - return getStorage(conf).getAllForType(type); - } catch (Exception e) { - throw new IOException("Can't get jobs", e); - } - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/JobStateTracker.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/JobStateTracker.java deleted file mode 100644 index 1e3dc4d..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/JobStateTracker.java +++ /dev/null @@ -1,146 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton.tool; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.zookeeper.CreateMode; -import org.apache.zookeeper.KeeperException; -import org.apache.zookeeper.ZooKeeper; -import org.apache.zookeeper.ZooDefs.Ids; -import org.apache.zookeeper.data.Stat; - -public class JobStateTracker { - // The path to the tracking root - private String job_trackingroot = null; - - // The zookeeper connection to use - private ZooKeeper zk; - - // The id of the tracking node -- must be a SEQUENTIAL node - private String trackingnode; - - // The id of the job this tracking node represents - private String jobid; - - // The logger - private static final Log LOG = LogFactory.getLog(JobStateTracker.class); - - /** - * Constructor for a new node -- takes the jobid of an existing job - * - */ - public JobStateTracker(String node, ZooKeeper zk, boolean nodeIsTracker, - String job_trackingpath) { - this.zk = zk; - if (nodeIsTracker) { - trackingnode = node; - } else { - jobid = node; - } - job_trackingroot = job_trackingpath; - } - - /** - * Create the parent znode for this job state. - */ - public void create() - throws IOException { - String[] paths = ZooKeeperStorage.getPaths(job_trackingroot); - for (String znode : paths) { - try { - zk.create(znode, new byte[0], - Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); - } catch (KeeperException.NodeExistsException e) { - } catch (Exception e) { - throw new IOException("Unable to create parent nodes"); - } - } - try { - trackingnode = zk.create(makeTrackingZnode(), jobid.getBytes(), - Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT_SEQUENTIAL); - } catch (Exception e) { - throw new IOException("Unable to create " + makeTrackingZnode()); - } - } - - public void delete() - throws IOException { - try { - zk.delete(makeTrackingJobZnode(trackingnode), -1); - } catch (Exception e) { - // Might have been deleted already - LOG.info("Couldn't delete " + makeTrackingJobZnode(trackingnode)); - } - } - - /** - * Get the jobid for this tracking node - * @throws IOException - */ - public String getJobID() throws IOException { - try { - return new String(zk.getData(makeTrackingJobZnode(trackingnode), - false, new Stat())); - } catch (KeeperException e) { - // It was deleted during the transaction - throw new IOException("Node already deleted " + trackingnode); - } catch (InterruptedException e) { - throw new IOException("Couldn't read node " + trackingnode); - } - } - - /** - * Make a ZK path to a new tracking node - */ - public String makeTrackingZnode() { - return job_trackingroot + "/"; - } - - /** - * Make a ZK path to an existing tracking node - */ - public String makeTrackingJobZnode(String nodename) { - return job_trackingroot + "/" + nodename; - } - - /* - * Get the list of tracking jobs. These can be used to determine which jobs have - * expired. - */ - public static List getTrackingJobs(Configuration conf, ZooKeeper zk) - throws IOException { - ArrayList jobs = new ArrayList(); - try { - for (String myid : zk.getChildren( - conf.get(TempletonStorage.STORAGE_ROOT) - + ZooKeeperStorage.TRACKINGDIR, false)) { - jobs.add(myid); - } - } catch (Exception e) { - throw new IOException("Can't get tracking children", e); - } - return jobs; - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/NotFoundException.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/NotFoundException.java deleted file mode 100644 index d49f05a..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/NotFoundException.java +++ /dev/null @@ -1,33 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton.tool; - -/** - * Simple not found exception. - */ -public class NotFoundException extends Exception { - private static final long serialVersionUID = 1L; - - public NotFoundException(String msg) { - super(msg); - } - public NotFoundException(String msg, Throwable rootCause) { - super(msg, rootCause); - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/NullRecordReader.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/NullRecordReader.java deleted file mode 100644 index 558f5cf..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/NullRecordReader.java +++ /dev/null @@ -1,62 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton.tool; - -import java.io.IOException; - -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; - -/** - * An empty record reader. - */ -public class NullRecordReader - extends RecordReader { - @Override - public void initialize(InputSplit genericSplit, TaskAttemptContext context) - throws IOException { - } - - @Override - public void close() throws IOException { - } - - @Override - public NullWritable getCurrentKey() { - return NullWritable.get(); - } - - @Override - public NullWritable getCurrentValue() { - return NullWritable.get(); - } - - @Override - public float getProgress() { - return 1.0f; - } - - @Override - public boolean nextKeyValue() throws IOException { - return false; - } -} - diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/NullSplit.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/NullSplit.java deleted file mode 100644 index 8875341..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/NullSplit.java +++ /dev/null @@ -1,43 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton.tool; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.mapreduce.InputSplit; - -/** - * An empty splitter. - */ -public class NullSplit extends InputSplit implements Writable { - public long getLength() { return 0; } - - public String[] getLocations() throws IOException { - return new String[]{}; - } - - @Override - public void write(DataOutput out) throws IOException {} - - @Override - public void readFields(DataInput in) throws IOException {} -} - diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/SingleInputFormat.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/SingleInputFormat.java deleted file mode 100644 index 2769343..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/SingleInputFormat.java +++ /dev/null @@ -1,51 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton.tool; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.mapreduce.InputFormat; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.JobContext; -import org.apache.hadoop.mapreduce.RecordReader; -import org.apache.hadoop.mapreduce.TaskAttemptContext; - -/** - * An empty InputFormat. - */ -public class SingleInputFormat - extends InputFormat { - public List getSplits(JobContext job) - throws IOException { - List res = new ArrayList(); - res.add(new NullSplit()); - return res; - } - - public RecordReader - createRecordReader(InputSplit split, - TaskAttemptContext context) - throws IOException { - return new NullRecordReader(); - } -} - diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/TempletonControllerJob.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/TempletonControllerJob.java deleted file mode 100644 index 3268e34..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/TempletonControllerJob.java +++ /dev/null @@ -1,351 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton.tool; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.OutputStream; -import java.io.PrintWriter; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapred.JobClient; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.JobID; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat; -import org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; - -/** - * A Map Reduce job that will start another job. - * - * We have a single Mapper job that starts a child MR job. The parent - * monitors the child child job and ends when the child job exits. In - * addition, we - * - * - write out the parent job id so the caller can record it. - * - run a keep alive thread so the job doesn't end. - * - Optionally, store the stdout, stderr, and exit value of the child - * in hdfs files. - */ -public class TempletonControllerJob extends Configured implements Tool { - public static final String COPY_NAME = "templeton.copy"; - public static final String STATUSDIR_NAME = "templeton.statusdir"; - public static final String JAR_ARGS_NAME = "templeton.args"; - public static final String OVERRIDE_CLASSPATH = "templeton.override-classpath"; - - public static final String STDOUT_FNAME = "stdout"; - public static final String STDERR_FNAME = "stderr"; - public static final String EXIT_FNAME = "exit"; - - public static final int WATCHER_TIMEOUT_SECS = 10; - public static final int KEEP_ALIVE_MSEC = 60 * 1000; - - public static final String TOKEN_FILE_ARG_PLACEHOLDER - = "__WEBHCAT_TOKEN_FILE_LOCATION__"; - - - private static TrivialExecService execService = TrivialExecService.getInstance(); - - private static final Log LOG = LogFactory.getLog(TempletonControllerJob.class); - - - public static class LaunchMapper - extends Mapper { - protected Process startJob(Context context, String user, - String overrideClasspath) - throws IOException, InterruptedException { - Configuration conf = context.getConfiguration(); - copyLocal(COPY_NAME, conf); - String[] jarArgs - = TempletonUtils.decodeArray(conf.get(JAR_ARGS_NAME)); - - ArrayList removeEnv = new ArrayList(); - removeEnv.add("HADOOP_ROOT_LOGGER"); - Map env = TempletonUtils.hadoopUserEnv(user, - overrideClasspath); - List jarArgsList = new LinkedList(Arrays.asList(jarArgs)); - String tokenFile = System.getenv("HADOOP_TOKEN_FILE_LOCATION"); - - - if (tokenFile != null) { - //Token is available, so replace the placeholder - String tokenArg = "mapreduce.job.credentials.binary=" + tokenFile; - for(int i=0; i it = jarArgsList.iterator(); - while(it.hasNext()){ - String arg = it.next(); - if(arg.contains(TOKEN_FILE_ARG_PLACEHOLDER)){ - it.remove(); - } - } - } - return execService.run(jarArgsList, removeEnv, env); - } - - private void copyLocal(String var, Configuration conf) - throws IOException { - String[] filenames = TempletonUtils.decodeArray(conf.get(var)); - if (filenames != null) { - for (String filename : filenames) { - Path src = new Path(filename); - Path dst = new Path(src.getName()); - FileSystem fs = src.getFileSystem(conf); - System.err.println("templeton: copy " + src + " => " + dst); - fs.copyToLocalFile(src, dst); - } - } - } - - @Override - public void run(Context context) - throws IOException, InterruptedException { - - Configuration conf = context.getConfiguration(); - - Process proc = startJob(context, - conf.get("user.name"), - conf.get(OVERRIDE_CLASSPATH)); - - String statusdir = conf.get(STATUSDIR_NAME); - - if (statusdir != null) { - statusdir = TempletonUtils.addUserHomeDirectoryIfApplicable(statusdir, conf.get("user.name"), conf); - } - - ExecutorService pool = Executors.newCachedThreadPool(); - executeWatcher(pool, conf, context.getJobID(), - proc.getInputStream(), statusdir, STDOUT_FNAME); - executeWatcher(pool, conf, context.getJobID(), - proc.getErrorStream(), statusdir, STDERR_FNAME); - KeepAlive keepAlive = startCounterKeepAlive(pool, context); - - proc.waitFor(); - keepAlive.sendReport = false; - pool.shutdown(); - if (!pool.awaitTermination(WATCHER_TIMEOUT_SECS, TimeUnit.SECONDS)) - pool.shutdownNow(); - - writeExitValue(conf, proc.exitValue(), statusdir); - JobState state = new JobState(context.getJobID().toString(), conf); - state.setExitValue(proc.exitValue()); - state.setCompleteStatus("done"); - state.close(); - - if (proc.exitValue() != 0) - System.err.println("templeton: job failed with exit code " - + proc.exitValue()); - else - System.err.println("templeton: job completed with exit code 0"); - } - - private void executeWatcher(ExecutorService pool, Configuration conf, - JobID jobid, InputStream in, String statusdir, - String name) - throws IOException { - Watcher w = new Watcher(conf, jobid, in, statusdir, name); - pool.execute(w); - } - - private KeepAlive startCounterKeepAlive(ExecutorService pool, Context cnt) - throws IOException { - KeepAlive k = new KeepAlive(cnt); - pool.execute(k); - return k; - } - - private void writeExitValue(Configuration conf, int exitValue, String statusdir) - throws IOException { - if (TempletonUtils.isset(statusdir)) { - Path p = new Path(statusdir, EXIT_FNAME); - FileSystem fs = p.getFileSystem(conf); - OutputStream out = fs.create(p); - System.err.println("templeton: Writing exit value " - + exitValue + " to " + p); - PrintWriter writer = new PrintWriter(out); - writer.println(exitValue); - writer.close(); - } - } - } - - private static class Watcher implements Runnable { - private InputStream in; - private OutputStream out; - private JobID jobid; - private Configuration conf; - - public Watcher(Configuration conf, JobID jobid, InputStream in, - String statusdir, String name) - throws IOException { - this.conf = conf; - this.jobid = jobid; - this.in = in; - - if (name.equals(STDERR_FNAME)) - out = System.err; - else - out = System.out; - - if (TempletonUtils.isset(statusdir)) { - Path p = new Path(statusdir, name); - FileSystem fs = p.getFileSystem(conf); - out = fs.create(p); - System.err.println("templeton: Writing status to " + p); - } - } - - @Override - public void run() { - try { - InputStreamReader isr = new InputStreamReader(in); - BufferedReader reader = new BufferedReader(isr); - PrintWriter writer = new PrintWriter(out); - - String line; - while ((line = reader.readLine()) != null) { - writer.println(line); - JobState state = null; - try { - String percent = TempletonUtils.extractPercentComplete(line); - String childid = TempletonUtils.extractChildJobId(line); - - if (percent != null || childid != null) { - state = new JobState(jobid.toString(), conf); - state.setPercentComplete(percent); - state.setChildId(childid); - } - } catch (IOException e) { - System.err.println("templeton: state error: " + e); - } finally { - if (state != null) { - try { - state.close(); - } catch (IOException e) { - } - } - } - } - writer.flush(); - } catch (IOException e) { - System.err.println("templeton: execute error: " + e); - } - } - } - - private static class KeepAlive implements Runnable { - private final Mapper.Context cnt; - private volatile boolean sendReport; - - public KeepAlive(Mapper.Context cnt) { - this.cnt = cnt; - this.sendReport = true; - } - - @Override - public void run() { - try { - while (sendReport) { - cnt.progress(); - Thread.sleep(KEEP_ALIVE_MSEC); - } - } catch (InterruptedException e) { - // Ok to be interrupted - } - } - } - - private JobID submittedJobId; - - public String getSubmittedId() { - if (submittedJobId == null) - return null; - else - return submittedJobId.toString(); - } - - /** - * Enqueue the job and print out the job id for later collection. - */ - @Override - public int run(String[] args) - throws IOException, InterruptedException, ClassNotFoundException { - Configuration conf = getConf(); - conf.set(JAR_ARGS_NAME, TempletonUtils.encodeArray(args)); - conf.set("user.name", UserGroupInformation.getCurrentUser().getShortUserName()); - Job job = new Job(conf); - job.setJarByClass(TempletonControllerJob.class); - job.setJobName("TempletonControllerJob"); - job.setMapperClass(LaunchMapper.class); - job.setMapOutputKeyClass(Text.class); - job.setMapOutputValueClass(Text.class); - job.setInputFormatClass(SingleInputFormat.class); - NullOutputFormat of - = new NullOutputFormat(); - job.setOutputFormatClass(of.getClass()); - job.setNumReduceTasks(0); - - JobClient jc = new JobClient(new JobConf(job.getConfiguration())); - - Token mrdt = jc.getDelegationToken(new Text("mr token")); - job.getCredentials().addToken(new Text("mr token"), mrdt); - job.submit(); - - submittedJobId = job.getJobID(); - - return 0; - } - - - public static void main(String[] args) throws Exception { - int ret = ToolRunner.run(new TempletonControllerJob(), args); - if (ret != 0) - System.err.println("TempletonControllerJob failed!"); - System.exit(ret); - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/TempletonStorage.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/TempletonStorage.java deleted file mode 100644 index 48164c8..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/TempletonStorage.java +++ /dev/null @@ -1,153 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton.tool; - -import java.io.IOException; -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.conf.Configuration; - -/** - * An interface to handle different Templeton storage methods, including - * ZooKeeper and HDFS. Any storage scheme must be able to handle being - * run in an HDFS environment, where specific file systems and virtual - * machines may not be available. - * - * Storage is done individually in a hierarchy: type (the data type, - * as listed below), then the id (a given jobid, jobtrackingid, etc.), - * then the key/value pairs. So an entry might look like: - * - * JOB - * jobid00035 - * user -> rachel - * datecreated -> 2/5/12 - * etc. - * - * Each field must be available to be fetched/changed individually. - */ -public interface TempletonStorage { - // These are the possible types referenced by 'type' below. - public enum Type { - UNKNOWN, JOB, JOBTRACKING, TEMPLETONOVERHEAD - } - - public static final String STORAGE_CLASS = "templeton.storage.class"; - public static final String STORAGE_ROOT = "templeton.storage.root"; - - /** - * Start the cleanup process for this storage type. - * @param config - */ - public void startCleanup(Configuration config); - - /** - * Save a single key/value pair for a specific job id. - * @param type The data type (as listed above) - * @param id The String id of this data grouping (jobid, etc.) - * @param key The name of the field to save - * @param val The value of the field to save - */ - public void saveField(Type type, String id, String key, String val) - throws NotFoundException; - - /** - * Get the value of one field for a given data type. If the type - * is UNKNOWN, search for the id in all types. - * @param type The data type (as listed above) - * @param id The String id of this data grouping (jobid, etc.) - * @param key The name of the field to retrieve - * @return The value of the field requested, or null if not - * found. - */ - public String getField(Type type, String id, String key); - - /** - * Get all the name/value pairs stored for this id. - * Be careful using getFields() -- optimistic locking will mean that - * your odds of a conflict are decreased if you read/write one field - * at a time. getFields() is intended for read-only usage. - * - * If the type is UNKNOWN, search for the id in all types. - * - * @param type The data type (as listed above) - * @param id The String id of this data grouping (jobid, etc.) - * @return A Map of key/value pairs found for this type/id. - */ - public Map getFields(Type type, String id); - - /** - * Delete a data grouping (all data for a jobid, all tracking data - * for a job, etc.). If the type is UNKNOWN, search for the id - * in all types. - * - * @param type The data type (as listed above) - * @param id The String id of this data grouping (jobid, etc.) - * @return True if successful, false if not, throws NotFoundException - * if the id wasn't found. - */ - public boolean delete(Type type, String id) throws NotFoundException; - - /** - * Get the id of each data grouping in the storage system. - * - * @return An ArrayList of ids. - */ - public List getAll(); - - /** - * Get the id of each data grouping of a given type in the storage - * system. - * @param type The data type (as listed above) - * @return An ArrayList of ids. - */ - public List getAllForType(Type type); - - /** - * Get the id of each data grouping that has the specific key/value - * pair. - * @param key The name of the field to search for - * @param value The value of the field to search for - * @return An ArrayList of ids. - */ - public List getAllForKey(String key, String value); - - /** - * Get the id of each data grouping of a given type that has the - * specific key/value pair. - * @param type The data type (as listed above) - * @param key The name of the field to search for - * @param value The value of the field to search for - * @return An ArrayList of ids. - */ - public List getAllForTypeAndKey(Type type, String key, - String value); - - /** - * For storage methods that require a connection, this is a hint - * that it's time to open a connection. - */ - public void openStorage(Configuration config) throws IOException; - - /** - * For storage methods that require a connection, this is a hint - * that it's time to close the connection. - */ - public void closeStorage() throws IOException; -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/TempletonUtils.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/TempletonUtils.java deleted file mode 100644 index 1caf61a..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/TempletonUtils.java +++ /dev/null @@ -1,302 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton.tool; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.net.URI; -import java.net.URISyntaxException; -import java.net.URL; -import java.net.URLConnection; -import java.security.PrivilegedExceptionAction; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hdfs.DistributedFileSystem; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.util.StringUtils; -import org.apache.hcatalog.templeton.UgiFactory; - -/** - * General utility methods. - */ -public class TempletonUtils { - /** - * Is the object non-empty? - */ - public static boolean isset(String s) { - return (s != null) && (s.length() > 0); - } - - /** - * Is the object non-empty? - */ - public static boolean isset(char ch) { - return (ch != 0); - } - - /** - * Is the object non-empty? - */ - public static boolean isset(T[] a) { - return (a != null) && (a.length > 0); - } - - - /** - * Is the object non-empty? - */ - public static boolean isset(Collection col) { - return (col != null) && (!col.isEmpty()); - } - - /** - * Is the object non-empty? - */ - public static boolean isset(Map col) { - return (col != null) && (!col.isEmpty()); - } - - - public static final Pattern JAR_COMPLETE - = Pattern.compile(" map \\d+%\\s+reduce \\d+%$"); - public static final Pattern PIG_COMPLETE = Pattern.compile(" \\d+% complete$"); - - /** - * Extract the percent complete line from Pig or Jar jobs. - */ - public static String extractPercentComplete(String line) { - Matcher jar = JAR_COMPLETE.matcher(line); - if (jar.find()) - return jar.group().trim(); - - Matcher pig = PIG_COMPLETE.matcher(line); - if (pig.find()) - return pig.group().trim(); - - return null; - } - - public static final Pattern JAR_ID = Pattern.compile(" Running job: (\\S+)$"); - public static final Pattern PIG_ID = Pattern.compile(" HadoopJobId: (\\S+)$"); - public static final Pattern[] ID_PATTERNS = {JAR_ID, PIG_ID}; - - /** - * Extract the job id from jar jobs. - */ - public static String extractChildJobId(String line) { - for (Pattern p : ID_PATTERNS) { - Matcher m = p.matcher(line); - if (m.find()) - return m.group(1); - } - - return null; - } - - /** - * Take an array of strings and encode it into one string. - */ - public static String encodeArray(String[] plain) { - if (plain == null) - return null; - - String[] escaped = new String[plain.length]; - - for (int i = 0; i < plain.length; ++i) { - if (plain[i] == null) { - plain[i] = ""; - } - escaped[i] = StringUtils.escapeString(plain[i]); - } - - return StringUtils.arrayToString(escaped); - } - - /** - * Encode a List into a string. - */ - public static String encodeArray(List list) { - if (list == null) - return null; - String[] array = new String[list.size()]; - return encodeArray(list.toArray(array)); - } - - /** - * Take an encode strings and decode it into an array of strings. - */ - public static String[] decodeArray(String s) { - if (s == null) - return null; - - String[] escaped = StringUtils.split(s); - String[] plain = new String[escaped.length]; - - for (int i = 0; i < escaped.length; ++i) - plain[i] = StringUtils.unEscapeString(escaped[i]); - - return plain; - } - - public static String[] hadoopFsListAsArray(String files, Configuration conf, - String user) - throws URISyntaxException, FileNotFoundException, IOException, - InterruptedException { - if (files == null || conf == null) { - return null; - } - String[] dirty = files.split(","); - String[] clean = new String[dirty.length]; - - for (int i = 0; i < dirty.length; ++i) - clean[i] = hadoopFsFilename(dirty[i], conf, user); - - return clean; - } - - public static String hadoopFsListAsString(String files, Configuration conf, - String user) - throws URISyntaxException, FileNotFoundException, IOException, - InterruptedException { - if (files == null || conf == null) { - return null; - } - return StringUtils.arrayToString(hadoopFsListAsArray(files, conf, user)); - } - - public static String hadoopFsFilename(String fname, Configuration conf, String user) - throws URISyntaxException, FileNotFoundException, IOException, - InterruptedException { - Path p = hadoopFsPath(fname, conf, user); - if (p == null) - return null; - else - return p.toString(); - } - - /** - * @return true iff we are sure the file is not there. - */ - public static boolean hadoopFsIsMissing(FileSystem fs, Path p) { - try { - return !fs.exists(p); - } catch (Throwable t) { - // Got an error, might be there anyway due to a - // permissions problem. - return false; - } - } - - public static String addUserHomeDirectoryIfApplicable(String origPathStr, String user, Configuration conf) throws IOException { - Path path = new Path(origPathStr); - String result = origPathStr; - - // shortcut for s3/asv - // If path contains scheme, user should mean an absolute path, - // However, path.isAbsolute tell us otherwise. - // So we skip conversion for non-hdfs. - if (!(path.getFileSystem(conf) instanceof DistributedFileSystem)&& - !(path.getFileSystem(conf) instanceof LocalFileSystem)) { - return result; - } - if (!path.isAbsolute()) { - result = "/user/" + user + "/" + origPathStr; - } - return result; - } - - public static Path hadoopFsPath(String fname, final Configuration conf, String user) - throws URISyntaxException, IOException, - InterruptedException { - if (fname == null || conf == null) { - return null; - } - - UserGroupInformation ugi; - if (user!=null) { - ugi = UgiFactory.getUgi(user); - } else { - ugi = UserGroupInformation.getLoginUser(); - } - final String finalFName = new String(fname); - - final FileSystem defaultFs = - ugi.doAs(new PrivilegedExceptionAction() { - public FileSystem run() - throws URISyntaxException, IOException, InterruptedException { - return FileSystem.get(new URI(finalFName), conf); - } - }); - - fname = addUserHomeDirectoryIfApplicable(fname, user, conf); - URI u = new URI(fname); - Path p = new Path(u).makeQualified(defaultFs); - - if (hadoopFsIsMissing(defaultFs, p)) - throw new FileNotFoundException("File " + fname + " does not exist."); - - return p; - } - - /** - * GET the given url. Returns the number of bytes received. - */ - public static int fetchUrl(URL url) - throws IOException { - URLConnection cnx = url.openConnection(); - InputStream in = cnx.getInputStream(); - - byte[] buf = new byte[8192]; - int total = 0; - int len = 0; - while ((len = in.read(buf)) >= 0) - total += len; - - return total; - } - - /** - * Set the environment variables to specify the hadoop user. - */ - public static Map hadoopUserEnv(String user, - String overrideClasspath) { - HashMap env = new HashMap(); - env.put("HADOOP_USER_NAME", user); - - if (overrideClasspath != null) { - env.put("HADOOP_USER_CLASSPATH_FIRST", "true"); - String cur = System.getenv("HADOOP_CLASSPATH"); - if (TempletonUtils.isset(cur)) - overrideClasspath = overrideClasspath + ":" + cur; - env.put("HADOOP_CLASSPATH", overrideClasspath); - } - - return env; - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/TrivialExecService.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/TrivialExecService.java deleted file mode 100644 index b54f56f..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/TrivialExecService.java +++ /dev/null @@ -1,56 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton.tool; - -import java.io.IOException; -import java.util.List; -import java.util.Map; - -/** - * Execute a local program. This is a singleton service that will - * execute a programs on the local box. - */ -public class TrivialExecService { - private static volatile TrivialExecService theSingleton; - - /** - * Retrieve the singleton. - */ - public static synchronized TrivialExecService getInstance() { - if (theSingleton == null) - theSingleton = new TrivialExecService(); - return theSingleton; - } - - public Process run(List cmd, List removeEnv, - Map environmentVariables) - throws IOException { - System.err.println("templeton: starting " + cmd); - System.err.print("With environment variables: "); - for (Map.Entry keyVal : environmentVariables.entrySet()) { - System.err.println(keyVal.getKey() + "=" + keyVal.getValue()); - } - ProcessBuilder pb = new ProcessBuilder(cmd); - for (String key : removeEnv) - pb.environment().remove(key); - pb.environment().putAll(environmentVariables); - return pb.start(); - } - -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/ZooKeeperCleanup.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/ZooKeeperCleanup.java deleted file mode 100644 index 60735bf..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/ZooKeeperCleanup.java +++ /dev/null @@ -1,199 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton.tool; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Date; - -import org.apache.hadoop.conf.Configuration; -import org.apache.zookeeper.ZooKeeper; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -/** - * This does periodic cleanup - */ -public class ZooKeeperCleanup extends Thread { - protected Configuration appConf; - - // The interval to wake up and check the queue - public static final String ZK_CLEANUP_INTERVAL = - "templeton.zookeeper.cleanup.interval"; // 12 hours - - // The max age of a task allowed - public static final String ZK_CLEANUP_MAX_AGE = - "templeton.zookeeper.cleanup.maxage"; // ~ 1 week - - protected static long interval = 1000L * 60L * 60L * 12L; - protected static long maxage = 1000L * 60L * 60L * 24L * 7L; - - // The logger - private static final Log LOG = LogFactory.getLog(ZooKeeperCleanup.class); - - // Handle to cancel loop - private boolean stop = false; - - // The instance - private static ZooKeeperCleanup thisclass = null; - - // Whether the cycle is running - private static boolean isRunning = false; - - /** - * Create a cleanup object. We use the appConfig to configure JobState. - * @param appConf - */ - private ZooKeeperCleanup(Configuration appConf) { - this.appConf = appConf; - interval = appConf.getLong(ZK_CLEANUP_INTERVAL, interval); - maxage = appConf.getLong(ZK_CLEANUP_MAX_AGE, maxage); - } - - public static ZooKeeperCleanup getInstance(Configuration appConf) { - if (thisclass != null) { - return thisclass; - } - thisclass = new ZooKeeperCleanup(appConf); - return thisclass; - } - - public static void startInstance(Configuration appConf) throws IOException { - if (!isRunning) { - getInstance(appConf).start(); - } - } - - /** - * Run the cleanup loop. - * - * @throws IOException - */ - public void run() { - ZooKeeper zk = null; - List nodes = null; - isRunning = true; - while (!stop) { - try { - // Put each check in a separate try/catch, so if that particular - // cycle fails, it'll try again on the next cycle. - try { - zk = ZooKeeperStorage.zkOpen(appConf); - - nodes = getChildList(zk); - - for (String node : nodes) { - boolean deleted = checkAndDelete(node, zk); - if (!deleted) { - break; - } - } - - zk.close(); - } catch (Exception e) { - LOG.error("Cleanup cycle failed: " + e.getMessage()); - } finally { - if (zk != null) { - try { - zk.close(); - } catch (InterruptedException e) { - // We're trying to exit anyway, just ignore. - } - } - } - - long sleepMillis = (long) (Math.random() * interval); - LOG.info("Next execution: " + new Date(new Date().getTime() - + sleepMillis)); - Thread.sleep(sleepMillis); - - } catch (Exception e) { - // If sleep fails, we should exit now before things get worse. - isRunning = false; - LOG.error("Cleanup failed: " + e.getMessage(), e); - } - } - isRunning = false; - } - - /** - * Get the list of jobs from JobState - * - * @throws IOException - */ - public List getChildList(ZooKeeper zk) { - try { - List jobs = JobStateTracker.getTrackingJobs(appConf, zk); - Collections.sort(jobs); - return jobs; - } catch (IOException e) { - LOG.info("No jobs to check."); - } - return new ArrayList(); - } - - /** - * Check to see if a job is more than maxage old, and delete it if so. - */ - public boolean checkAndDelete(String node, ZooKeeper zk) { - JobState state = null; - try { - JobStateTracker tracker = new JobStateTracker(node, zk, true, - appConf.get(TempletonStorage.STORAGE_ROOT + - ZooKeeperStorage.TRACKINGDIR)); - long now = new Date().getTime(); - state = new JobState(tracker.getJobID(), appConf); - - // Set the default to 0 -- if the created date is null, there was - // an error in creation, and we want to delete it anyway. - long then = 0; - if (state.getCreated() != null) { - then = state.getCreated(); - } - if (now - then > maxage) { - LOG.info("Deleting " + tracker.getJobID()); - state.delete(); - tracker.delete(); - return true; - } - return false; - } catch (Exception e) { - LOG.info("checkAndDelete failed for " + node); - // We don't throw a new exception for this -- just keep going with the - // next one. - return true; - } finally { - if (state != null) { - try { - state.close(); - } catch (IOException e) { - LOG.info("Couldn't close job state."); - } - } - } - } - - // Handle to stop this process from the outside if needed. - public void exit() { - stop = true; - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/ZooKeeperStorage.java hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/ZooKeeperStorage.java deleted file mode 100644 index 9d92a25..0000000 --- hcatalog/webhcat/svr/src/main/java/org/apache/hcatalog/templeton/tool/ZooKeeperStorage.java +++ /dev/null @@ -1,372 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton.tool; - -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.zookeeper.CreateMode; -import org.apache.zookeeper.KeeperException; -import org.apache.zookeeper.WatchedEvent; -import org.apache.zookeeper.Watcher; -import org.apache.zookeeper.ZooDefs.Ids; -import org.apache.zookeeper.ZooKeeper; - -/** - * A storage implementation based on storing everything in ZooKeeper. - * This keeps everything in a central location that is guaranteed - * to be available and accessible. - * - * Data is stored with each key/value pair being a node in ZooKeeper. - */ -public class ZooKeeperStorage implements TempletonStorage { - - public static final String TRACKINGDIR = "/created"; - - // Locations for each of the storage types - public String storage_root = null; - public String job_path = null; - public String job_trackingpath = null; - public String overhead_path = null; - - public static final String ZK_HOSTS = "templeton.zookeeper.hosts"; - public static final String ZK_SESSION_TIMEOUT - = "templeton.zookeeper.session-timeout"; - - public static final String ENCODING = "UTF-8"; - - private static final Log LOG = LogFactory.getLog(ZooKeeperStorage.class); - - private ZooKeeper zk; - - /** - * Open a ZooKeeper connection for the JobState. - */ - public static ZooKeeper zkOpen(String zkHosts, int zkSessionTimeout) - throws IOException { - return new ZooKeeper(zkHosts, - zkSessionTimeout, - new Watcher() { - @Override - synchronized public void process(WatchedEvent event) { - } - }); - } - - /** - * Open a ZooKeeper connection for the JobState. - */ - public static ZooKeeper zkOpen(Configuration conf) - throws IOException { - return zkOpen(conf.get(ZK_HOSTS), - conf.getInt(ZK_SESSION_TIMEOUT, 30000)); - } - - public ZooKeeperStorage() { - // No-op -- this is needed to be able to instantiate the - // class from the name. - } - - /** - * Close this ZK connection. - */ - public void close() - throws IOException { - if (zk != null) { - try { - zk.close(); - zk = null; - } catch (InterruptedException e) { - throw new IOException("Closing ZooKeeper connection", e); - } - } - } - - public void startCleanup(Configuration config) { - try { - ZooKeeperCleanup.startInstance(config); - } catch (Exception e) { - LOG.warn("Cleanup instance didn't start."); - } - } - - /** - * Create a node in ZooKeeper - */ - public void create(Type type, String id) - throws IOException { - try { - String[] paths = getPaths(makeZnode(type, id)); - boolean wasCreated = false; - for (String znode : paths) { - try { - zk.create(znode, new byte[0], - Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); - wasCreated = true; - } catch (KeeperException.NodeExistsException e) { - } - } - if (wasCreated) { - try { - // Really not sure if this should go here. Will have - // to see how the storage mechanism evolves. - if (type.equals(Type.JOB)) { - JobStateTracker jt = new JobStateTracker(id, zk, false, - job_trackingpath); - jt.create(); - } - } catch (Exception e) { - LOG.warn("Error tracking: " + e.getMessage()); - // If we couldn't create the tracker node, don't - // create the main node. - zk.delete(makeZnode(type, id), -1); - } - } - if (zk.exists(makeZnode(type, id), false) == null) - throw new IOException("Unable to create " + makeZnode(type, id)); - if (wasCreated) { - try { - saveField(type, id, "created", - Long.toString(System.currentTimeMillis())); - } catch (NotFoundException nfe) { - // Wow, something's really wrong. - throw new IOException("Couldn't write to node " + id, nfe); - } - } - } catch (KeeperException e) { - throw new IOException("Creating " + id, e); - } catch (InterruptedException e) { - throw new IOException("Creating " + id, e); - } - } - - /** - * Get the path based on the job type. - * - * @param type - */ - public String getPath(Type type) { - String typepath = overhead_path; - switch (type) { - case JOB: - typepath = job_path; - break; - case JOBTRACKING: - typepath = job_trackingpath; - break; - } - return typepath; - } - - public static String[] getPaths(String fullpath) { - ArrayList paths = new ArrayList(); - if (fullpath.length() < 2) { - paths.add(fullpath); - } else { - int location = 0; - while ((location = fullpath.indexOf("/", location + 1)) > 0) { - paths.add(fullpath.substring(0, location)); - } - paths.add(fullpath); - } - String[] strings = new String[paths.size()]; - return paths.toArray(strings); - } - - /** - * A helper method that sets a field value. - * @param type - * @param id - * @param name - * @param val - * @throws KeeperException - * @throws UnsupportedEncodingException - * @throws InterruptedException - */ - private void setFieldData(Type type, String id, String name, String val) - throws KeeperException, UnsupportedEncodingException, InterruptedException { - try { - zk.create(makeFieldZnode(type, id, name), - val.getBytes(ENCODING), - Ids.OPEN_ACL_UNSAFE, - CreateMode.PERSISTENT); - } catch (KeeperException.NodeExistsException e) { - zk.setData(makeFieldZnode(type, id, name), - val.getBytes(ENCODING), - -1); - } - } - - /** - * Make a ZK path to the named field. - */ - public String makeFieldZnode(Type type, String id, String name) { - return makeZnode(type, id) + "/" + name; - } - - /** - * Make a ZK path to job - */ - public String makeZnode(Type type, String id) { - return getPath(type) + "/" + id; - } - - @Override - public void saveField(Type type, String id, String key, String val) - throws NotFoundException { - try { - if (val != null) { - create(type, id); - setFieldData(type, id, key, val); - } - } catch (Exception e) { - throw new NotFoundException("Writing " + key + ": " + val + ", " - + e.getMessage()); - } - } - - @Override - public String getField(Type type, String id, String key) { - try { - byte[] b = zk.getData(makeFieldZnode(type, id, key), false, null); - return new String(b, ENCODING); - } catch (Exception e) { - return null; - } - } - - @Override - public Map getFields(Type type, String id) { - HashMap map = new HashMap(); - try { - for (String node : zk.getChildren(makeZnode(type, id), false)) { - byte[] b = zk.getData(makeFieldZnode(type, id, node), - false, null); - map.put(node, new String(b, ENCODING)); - } - } catch (Exception e) { - return map; - } - return map; - } - - @Override - public boolean delete(Type type, String id) throws NotFoundException { - try { - for (String child : zk.getChildren(makeZnode(type, id), false)) { - try { - zk.delete(makeFieldZnode(type, id, child), -1); - } catch (Exception e) { - // Other nodes may be trying to delete this at the same time, - // so just log errors and skip them. - throw new NotFoundException("Couldn't delete " + - makeFieldZnode(type, id, child)); - } - } - try { - zk.delete(makeZnode(type, id), -1); - } catch (Exception e) { - // Same thing -- might be deleted by other nodes, so just go on. - throw new NotFoundException("Couldn't delete " + - makeZnode(type, id)); - } - } catch (Exception e) { - // Error getting children of node -- probably node has been deleted - throw new NotFoundException("Couldn't get children of " + - makeZnode(type, id)); - } - return true; - } - - @Override - public List getAll() { - ArrayList allNodes = new ArrayList(); - for (Type type : Type.values()) { - allNodes.addAll(getAllForType(type)); - } - return allNodes; - } - - @Override - public List getAllForType(Type type) { - try { - return zk.getChildren(getPath(type), false); - } catch (Exception e) { - return new ArrayList(); - } - } - - @Override - public List getAllForKey(String key, String value) { - ArrayList allNodes = new ArrayList(); - try { - for (Type type : Type.values()) { - allNodes.addAll(getAllForTypeAndKey(type, key, value)); - } - } catch (Exception e) { - LOG.info("Couldn't find children."); - } - return allNodes; - } - - @Override - public List getAllForTypeAndKey(Type type, String key, String value) { - ArrayList allNodes = new ArrayList(); - try { - for (String id : zk.getChildren(getPath(type), false)) { - for (String field : zk.getChildren(id, false)) { - if (field.endsWith("/" + key)) { - byte[] b = zk.getData(field, false, null); - if (new String(b, ENCODING).equals(value)) { - allNodes.add(id); - } - } - } - } - } catch (Exception e) { - // Log and go to the next type -- this one might not exist - LOG.info("Couldn't find children of " + getPath(type)); - } - return allNodes; - } - - @Override - public void openStorage(Configuration config) throws IOException { - storage_root = config.get(STORAGE_ROOT); - job_path = storage_root + "/jobs"; - job_trackingpath = storage_root + TRACKINGDIR; - overhead_path = storage_root + "/overhead"; - - if (zk == null) { - zk = zkOpen(config); - } - } - - @Override - public void closeStorage() throws IOException { - close(); - } -} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java new file mode 100644 index 0000000..3d82f69 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java @@ -0,0 +1,223 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.io.File; +import java.net.URL; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.VersionInfo; +import org.apache.hive.hcatalog.templeton.tool.JobState; +import org.apache.hive.hcatalog.templeton.tool.ZooKeeperCleanup; +import org.apache.hive.hcatalog.templeton.tool.ZooKeeperStorage; + +/** + * The configuration for Templeton. This merges the normal Hadoop + * configuration with the Templeton specific variables. + * + * The Templeton configuration variables are described in + * templeton-default.xml + * + * The Templeton specific configuration is split into two layers + * + * 1. webhcat-default.xml - All the configuration variables that + * Templeton needs. These are the defaults that ship with the app + * and should only be changed be the app developers. + * + * 2. webhcat-site.xml - The (possibly empty) configuration that the + * system administrator can set variables for their Hadoop cluster. + * + * The configuration files are loaded in this order with later files + * overriding earlier ones. + * + * To find the configuration files, we first attempt to load a file + * from the CLASSPATH and then look in the directory specified in the + * TEMPLETON_HOME environment variable. + * + * In addition the configuration files may access the special env + * variable env for all environment variables. For example, the + * hadoop executable could be specified using: + *
+ *      ${env.HADOOP_PREFIX}/bin/hadoop
+ *
+ */ +public class AppConfig extends Configuration { + public static final String[] HADOOP_CONF_FILENAMES = { + "core-default.xml", "core-site.xml", "mapred-default.xml", "mapred-site.xml", "hdfs-site.xml" + }; + + public static final String[] HADOOP_PREFIX_VARS = { + "HADOOP_PREFIX", "HADOOP_HOME" + }; + + public static final String TEMPLETON_HOME_VAR = "TEMPLETON_HOME"; + + public static final String[] TEMPLETON_CONF_FILENAMES = { + "webhcat-default.xml", + "webhcat-site.xml" + }; + + public static final String PORT = "templeton.port"; + public static final String EXEC_ENCODING_NAME = "templeton.exec.encoding"; + public static final String EXEC_ENVS_NAME = "templeton.exec.envs"; + public static final String EXEC_MAX_BYTES_NAME = "templeton.exec.max-output-bytes"; + public static final String EXEC_MAX_PROCS_NAME = "templeton.exec.max-procs"; + public static final String EXEC_TIMEOUT_NAME = "templeton.exec.timeout"; + public static final String HADOOP_QUEUE_NAME = "templeton.hadoop.queue.name"; + public static final String HADOOP_NAME = "templeton.hadoop"; + public static final String HADOOP_CONF_DIR = "templeton.hadoop.conf.dir"; + public static final String HCAT_NAME = "templeton.hcat"; + public static final String HIVE_ARCHIVE_NAME = "templeton.hive.archive"; + public static final String HIVE_PATH_NAME = "templeton.hive.path"; + public static final String HIVE_PROPS_NAME = "templeton.hive.properties"; + public static final String LIB_JARS_NAME = "templeton.libjars"; + public static final String PIG_ARCHIVE_NAME = "templeton.pig.archive"; + public static final String PIG_PATH_NAME = "templeton.pig.path"; + public static final String STREAMING_JAR_NAME = "templeton.streaming.jar"; + public static final String TEMPLETON_JAR_NAME = "templeton.jar"; + public static final String OVERRIDE_JARS_NAME = "templeton.override.jars"; + public static final String OVERRIDE_JARS_ENABLED = "templeton.override.enabled"; + public static final String TEMPLETON_CONTROLLER_MR_CHILD_OPTS + = "templeton.controller.mr.child.opts"; + + public static final String KERBEROS_SECRET = "templeton.kerberos.secret"; + public static final String KERBEROS_PRINCIPAL = "templeton.kerberos.principal"; + public static final String KERBEROS_KEYTAB = "templeton.kerberos.keytab"; + + public static final String CALLBACK_INTERVAL_NAME + = "templeton.callback.retry.interval"; + public static final String CALLBACK_RETRY_NAME + = "templeton.callback.retry.attempts"; + + //Hadoop property names (set by templeton logic) + public static final String HADOOP_END_INTERVAL_NAME = "job.end.retry.interval"; + public static final String HADOOP_END_RETRY_NAME = "job.end.retry.attempts"; + public static final String HADOOP_END_URL_NAME = "job.end.notification.url"; + public static final String HADOOP_SPECULATIVE_NAME + = "mapred.map.tasks.speculative.execution"; + public static final String HADOOP_CHILD_JAVA_OPTS = "mapred.child.java.opts"; + public static final String UNIT_TEST_MODE = "templeton.unit.test.mode"; + + + private static final Log LOG = LogFactory.getLog(AppConfig.class); + + public AppConfig() { + init(); + LOG.info("Using Hadoop version " + VersionInfo.getVersion()); + } + + private void init() { + for (Map.Entry e : System.getenv().entrySet()) + set("env." + e.getKey(), e.getValue()); + + String templetonDir = getTempletonDir(); + for (String fname : TEMPLETON_CONF_FILENAMES) + if (! loadOneClasspathConfig(fname)) + loadOneFileConfig(templetonDir, fname); + + String hadoopConfDir = getHadoopConfDir(); + for (String fname : HADOOP_CONF_FILENAMES) + loadOneFileConfig(hadoopConfDir, fname); + ProxyUserSupport.processProxyuserConfig(this); + } + + public void startCleanup() { + JobState.getStorageInstance(this).startCleanup(this); + } + + public String getHadoopConfDir() { + return get(HADOOP_CONF_DIR); + } + + public static String getTempletonDir() { + return System.getenv(TEMPLETON_HOME_VAR); + } + + private boolean loadOneFileConfig(String dir, String fname) { + if (dir != null) { + File f = new File(dir, fname); + if (f.exists()) { + addResource(new Path(f.getAbsolutePath())); + LOG.debug("loaded config file " + f.getAbsolutePath()); + return true; + } + } + return false; + } + + private boolean loadOneClasspathConfig(String fname) { + URL x = getResource(fname); + if (x != null) { + addResource(x); + LOG.debug("loaded config from classpath " + x); + return true; + } + + return false; + } + + public String templetonJar() { return get(TEMPLETON_JAR_NAME); } + public String libJars() { return get(LIB_JARS_NAME); } + public String hadoopQueueName() { return get(HADOOP_QUEUE_NAME); } + public String clusterHadoop() { return get(HADOOP_NAME); } + public String clusterHcat() { return get(HCAT_NAME); } + public String pigPath() { return get(PIG_PATH_NAME); } + public String pigArchive() { return get(PIG_ARCHIVE_NAME); } + public String hivePath() { return get(HIVE_PATH_NAME); } + public String hiveArchive() { return get(HIVE_ARCHIVE_NAME); } + public String streamingJar() { return get(STREAMING_JAR_NAME); } + public String kerberosSecret() { return get(KERBEROS_SECRET); } + public String kerberosPrincipal(){ return get(KERBEROS_PRINCIPAL); } + public String kerberosKeytab() { return get(KERBEROS_KEYTAB); } + public String controllerMRChildOpts() { + return get(TEMPLETON_CONTROLLER_MR_CHILD_OPTS); + } + + + + public String[] overrideJars() { + if (getBoolean(OVERRIDE_JARS_ENABLED, true)) + return getStrings(OVERRIDE_JARS_NAME); + else + return null; + } + public String overrideJarsString() { + if (getBoolean(OVERRIDE_JARS_ENABLED, true)) + return get(OVERRIDE_JARS_NAME); + else + return null; + } + + public long zkCleanupInterval() { + return getLong(ZooKeeperCleanup.ZK_CLEANUP_INTERVAL, + (1000L * 60L * 60L * 12L)); + } + + public long zkMaxAge() { + return getLong(ZooKeeperCleanup.ZK_CLEANUP_MAX_AGE, + (1000L * 60L * 60L * 24L * 7L)); + } + + public String zkHosts() { return get(ZooKeeperStorage.ZK_HOSTS); } + public int zkSessionTimeout() { return getInt(ZooKeeperStorage.ZK_SESSION_TIMEOUT, 30000); } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/BadParam.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/BadParam.java new file mode 100644 index 0000000..d94356d --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/BadParam.java @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import org.eclipse.jetty.http.HttpStatus; + +/** + * Missing required or badly configured paramater. + */ +public class BadParam extends SimpleWebException { + public BadParam(String msg) { + super(HttpStatus.BAD_REQUEST_400, msg); + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/BusyException.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/BusyException.java new file mode 100644 index 0000000..548c783 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/BusyException.java @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import org.eclipse.jetty.http.HttpStatus; + +/** + * Simple "we are busy, try again" exception. + */ +public class BusyException extends SimpleWebException { + public BusyException() { + super(HttpStatus.SERVICE_UNAVAILABLE_503, "Busy, please retry"); + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CallbackFailedException.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CallbackFailedException.java new file mode 100644 index 0000000..e73314f --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CallbackFailedException.java @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import org.eclipse.jetty.http.HttpStatus; + +/** + * The callback failed when it tried to reach the callback URL. + */ +public class CallbackFailedException extends SimpleWebException { + public CallbackFailedException(String msg) { + super(HttpStatus.BAD_REQUEST_400, msg); + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CatchallExceptionMapper.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CatchallExceptionMapper.java new file mode 100644 index 0000000..5201621 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CatchallExceptionMapper.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import javax.ws.rs.core.Response; +import javax.ws.rs.ext.ExceptionMapper; +import javax.ws.rs.ext.Provider; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.eclipse.jetty.http.HttpStatus; + +import com.sun.jersey.api.NotFoundException; + +/** + * Map all exceptions to the Jersey response. This lets us have nice + * results in the error body. + */ +@Provider +public class CatchallExceptionMapper + implements ExceptionMapper { + private static final Log LOG = LogFactory.getLog(CatchallExceptionMapper.class); + + public Response toResponse(Exception e) { + LOG.error(e.getMessage(), e); + if (e instanceof NotFoundException) { + return SimpleWebException.buildMessage(HttpStatus.NOT_FOUND_404, null, e.getMessage()); + } + return SimpleWebException.buildMessage(HttpStatus.INTERNAL_SERVER_ERROR_500, null, e.getMessage()); + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ColumnDesc.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ColumnDesc.java new file mode 100644 index 0000000..99718ca --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ColumnDesc.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import javax.xml.bind.annotation.XmlRootElement; + +/** + * A description of the column to create. + */ +@XmlRootElement +public class ColumnDesc extends GroupPermissionsDesc { + public String name; + public String type; + public String comment; + + public ColumnDesc() {} + + /** + * Create a new ColumnDesc + */ + public ColumnDesc(String name, String type, String comment) { + this.name = name; + this.type = type; + this.comment = comment; + } + + public String toString() { + return String.format("ColumnDesc(name=%s, type=%s, comment=%s)", + name, type, comment); + } + + public boolean equals(Object o) { + if (this == o) + return true; + if (! (o instanceof ColumnDesc)) + return false; + ColumnDesc that = (ColumnDesc) o; + return xequals(this.name, that.name) + && xequals(this.type, that.type) + && xequals(this.comment, that.comment) + && super.equals(that) + ; + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CompleteBean.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CompleteBean.java new file mode 100644 index 0000000..3404529 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CompleteBean.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +/** + * CompleteBean - The results of an CompleteDelegator run. + */ +public class CompleteBean { + public String status; + + public CompleteBean() {} + + /** + * Create a new CompleteBean + * + * @param status run status + */ + public CompleteBean(String status) { + this.status = status; + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CompleteDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CompleteDelegator.java new file mode 100644 index 0000000..2ef57a4 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/CompleteDelegator.java @@ -0,0 +1,113 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.io.IOException; +import java.net.URL; +import java.net.MalformedURLException; +import java.util.Date; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hive.hcatalog.templeton.tool.JobState; +import org.apache.hive.hcatalog.templeton.tool.TempletonUtils; + +/** + * Complete a job. This will run the callback if + * + * - the job is done + * - there is a callback + * - the callback has not yet been called + * + * There is a small chance for a race condition if two callers run + * this at the same time. That should never happen. + * + * We use a Hadoop config var to notify this class on the completion + * of a job. Hadoop will call use multiple times in the event of + * failure. Even if the failure is that the client callback failed. + * + * See LauncherDelegator for the HADOOP_END_RETRY* vars that are set. + */ +public class CompleteDelegator extends TempletonDelegator { + private static final Log LOG = LogFactory.getLog(CompleteDelegator.class); + + public CompleteDelegator(AppConfig appConf) { + super(appConf); + } + + public CompleteBean run(String id) + throws CallbackFailedException, IOException { + if (id == null) + acceptWithError("No jobid given"); + + JobState state = null; + try { + state = new JobState(id, Main.getAppConfigInstance()); + if (state.getCompleteStatus() == null) + failed("Job not yet complete", null); + + Long notified = state.getNotifiedTime(); + if (notified != null) + return acceptWithError("Callback already run on " + + new Date(notified.longValue())); + + String callback = state.getCallback(); + if (callback == null) + return new CompleteBean("No callback registered"); + + try { + doCallback(state.getId(), callback); + } catch (Exception e) { + failed("Callback failed " + callback + " for " + id, e); + } + + state.setNotifiedTime(System.currentTimeMillis()); + return new CompleteBean("Callback sent"); + } finally { + if (state != null) + state.close(); + } + } + + /** + * Call the callback url with the jobid to let them know it's + * finished. If the url has the string $jobId in it, it will be + * replaced with the completed jobid. + */ + public static void doCallback(String jobid, String url) + throws MalformedURLException, IOException { + if (url.contains("$jobId")) + url = url.replace("$jobId", jobid); + TempletonUtils.fetchUrl(new URL(url)); + } + + private void failed(String msg, Exception e) + throws CallbackFailedException { + if (e != null) + LOG.error(msg, e); + else + LOG.error(msg); + throw new CallbackFailedException(msg); + } + + private CompleteBean acceptWithError(String msg) { + LOG.error(msg); + return new CompleteBean(msg); + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/DatabaseDesc.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/DatabaseDesc.java new file mode 100644 index 0000000..0e262a8 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/DatabaseDesc.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.util.Map; + +import javax.xml.bind.annotation.XmlRootElement; + +/** + * A description of the database to create. + */ +@XmlRootElement +public class DatabaseDesc extends GroupPermissionsDesc { + public boolean ifNotExists; + public String database; + public String comment; + public String location; + public Map properties; + + public DatabaseDesc() {} + + public String toString() { + return String.format("DatabaseDesc(database=%s, comment=%s, location=%s, " + + "properties=%s)", database, comment, location, properties); + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/DeleteDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/DeleteDelegator.java new file mode 100644 index 0000000..3593004 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/DeleteDelegator.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.io.IOException; + +import org.apache.hadoop.hive.shims.HadoopShims.WebHCatJTShim; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.mapred.JobID; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hive.hcatalog.templeton.tool.JobState; + +/** + * Delete a job + */ +public class DeleteDelegator extends TempletonDelegator { + public DeleteDelegator(AppConfig appConf) { + super(appConf); + } + + public QueueStatusBean run(String user, String id) + throws NotAuthorizedException, BadParam, IOException, InterruptedException + { + UserGroupInformation ugi = UgiFactory.getUgi(user); + WebHCatJTShim tracker = null; + JobState state = null; + try { + tracker = ShimLoader.getHadoopShims().getWebHCatShim(appConf, ugi); + JobID jobid = StatusDelegator.StringToJobID(id); + if (jobid == null) + throw new BadParam("Invalid jobid: " + id); + tracker.killJob(jobid); + state = new JobState(id, Main.getAppConfigInstance()); + String childid = state.getChildId(); + if (childid != null) + tracker.killJob(StatusDelegator.StringToJobID(childid)); + return StatusDelegator.makeStatus(tracker, jobid, state); + } catch (IllegalStateException e) { + throw new BadParam(e.getMessage()); + } finally { + if (tracker != null) + tracker.close(); + if (state != null) + state.close(); + } + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/EnqueueBean.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/EnqueueBean.java new file mode 100644 index 0000000..f08964e --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/EnqueueBean.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +/** + * EnqueueBean - The results of a call that enqueues a Hadoop job. + */ +public class EnqueueBean { + public String id; + + public EnqueueBean() {} + + /** + * Create a new EnqueueBean. + * + * @param id job id + */ + public EnqueueBean(String id) { + this.id = id; + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ExecBean.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ExecBean.java new file mode 100644 index 0000000..c6c7b6e --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ExecBean.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +/** + * ExecBean - The results of an exec call. + */ +public class ExecBean { + public String stdout; + public String stderr; + public int exitcode; + + public ExecBean() {} + + /** + * Create a new ExecBean. + * + * @param stdout standard output of the the program. + * @param stderr error output of the the program. + * @param exitcode exit code of the program. + */ + public ExecBean(String stdout, String stderr, int exitcode) { + this.stdout = stdout; + this.stderr = stderr; + this.exitcode = exitcode; + } + + public String toString() { + return String.format("ExecBean(stdout=%s, stderr=%s, exitcode=%s)", + stdout, stderr, exitcode); + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ExecService.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ExecService.java new file mode 100644 index 0000000..5e8db93 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ExecService.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +import org.apache.commons.exec.ExecuteException; + +public interface ExecService { + public ExecBean run(String program, List args, + Map env) + throws NotAuthorizedException, BusyException, ExecuteException, IOException; + + public ExecBean runUnlimited(String program, List args, + Map env) + throws NotAuthorizedException, ExecuteException, IOException; +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ExecServiceImpl.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ExecServiceImpl.java new file mode 100644 index 0000000..23577d0 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ExecServiceImpl.java @@ -0,0 +1,194 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.Semaphore; + +import org.apache.commons.exec.CommandLine; +import org.apache.commons.exec.DefaultExecutor; +import org.apache.commons.exec.ExecuteException; +import org.apache.commons.exec.ExecuteWatchdog; +import org.apache.commons.exec.PumpStreamHandler; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * Execute a local program. This is a singleton service that will + * execute programs as non-privileged users on the local box. See + * ExecService.run and ExecService.runUnlimited for details. + */ +public class ExecServiceImpl implements ExecService { + private static final Log LOG = LogFactory.getLog(ExecServiceImpl.class); + private static AppConfig appConf = Main.getAppConfigInstance(); + + private static volatile ExecServiceImpl theSingleton; + + /** + * Retrieve the singleton. + */ + public static synchronized ExecServiceImpl getInstance() { + if (theSingleton == null) { + theSingleton = new ExecServiceImpl(); + } + return theSingleton; + } + + private Semaphore avail; + + private ExecServiceImpl() { + avail = new Semaphore(appConf.getInt(AppConfig.EXEC_MAX_PROCS_NAME, 16)); + } + + /** + * Run the program synchronously as the given user. We rate limit + * the number of processes that can simultaneously created for + * this instance. + * + * @param program The program to run + * @param args Arguments to pass to the program + * @param env Any extra environment variables to set + * @return The result of the run. + */ + public ExecBean run(String program, List args, + Map env) + throws NotAuthorizedException, BusyException, ExecuteException, IOException { + boolean aquired = false; + try { + aquired = avail.tryAcquire(); + if (aquired) { + return runUnlimited(program, args, env); + } else { + throw new BusyException(); + } + } finally { + if (aquired) { + avail.release(); + } + } + } + + /** + * Run the program synchronously as the given user. Warning: + * CommandLine will trim the argument strings. + * + * @param program The program to run. + * @param args Arguments to pass to the program + * @param env Any extra environment variables to set + * @return The result of the run. + */ + public ExecBean runUnlimited(String program, List args, + Map env) + throws NotAuthorizedException, ExecuteException, IOException { + try { + return auxRun(program, args, env); + } catch (IOException e) { + File cwd = new java.io.File("."); + if (cwd.canRead() && cwd.canWrite()) + throw e; + else + throw new IOException("Invalid permissions on Templeton directory: " + + cwd.getCanonicalPath()); + } + } + + private ExecBean auxRun(String program, List args, Map env) + throws NotAuthorizedException, ExecuteException, IOException { + DefaultExecutor executor = new DefaultExecutor(); + executor.setExitValues(null); + + // Setup stdout and stderr + int nbytes = appConf.getInt(AppConfig.EXEC_MAX_BYTES_NAME, -1); + ByteArrayOutputStream outStream = new MaxByteArrayOutputStream(nbytes); + ByteArrayOutputStream errStream = new MaxByteArrayOutputStream(nbytes); + executor.setStreamHandler(new PumpStreamHandler(outStream, errStream)); + + // Only run for N milliseconds + int timeout = appConf.getInt(AppConfig.EXEC_TIMEOUT_NAME, 0); + ExecuteWatchdog watchdog = new ExecuteWatchdog(timeout); + executor.setWatchdog(watchdog); + + CommandLine cmd = makeCommandLine(program, args); + + LOG.info("Running: " + cmd); + ExecBean res = new ExecBean(); + res.exitcode = executor.execute(cmd, execEnv(env)); + String enc = appConf.get(AppConfig.EXEC_ENCODING_NAME); + res.stdout = outStream.toString(enc); + res.stderr = errStream.toString(enc); + + return res; + } + + private CommandLine makeCommandLine(String program, + List args) + throws NotAuthorizedException, IOException { + String path = validateProgram(program); + CommandLine cmd = new CommandLine(path); + if (args != null) + for (String arg : args) + cmd.addArgument(arg, false); + + return cmd; + } + + /** + * Build the environment used for all exec calls. + * + * @return The environment variables. + */ + public Map execEnv(Map env) { + HashMap res = new HashMap(); + + for (String key : appConf.getStrings(AppConfig.EXEC_ENVS_NAME)) { + String val = System.getenv(key); + if (val != null) { + res.put(key, val); + } + } + if (env != null) + res.putAll(env); + for (Map.Entry envs : res.entrySet()) { + LOG.info("Env " + envs.getKey() + "=" + envs.getValue()); + } + return res; + } + + /** + * Given a program name, lookup the fully qualified path. Throws + * an exception if the program is missing or not authorized. + * + * @param path The path of the program. + * @return The path of the validated program. + */ + public String validateProgram(String path) + throws NotAuthorizedException, IOException { + File f = new File(path); + if (f.canExecute()) { + return f.getCanonicalPath(); + } else { + throw new NotAuthorizedException("Unable to access program: " + path); + } + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/GroupPermissionsDesc.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/GroupPermissionsDesc.java new file mode 100644 index 0000000..6e9e521 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/GroupPermissionsDesc.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +/** + * The base create permissions for ddl objects. + */ +public abstract class GroupPermissionsDesc { + public String group; + public String permissions; + + public GroupPermissionsDesc() {} + + protected static boolean xequals(Object a, Object b) { + if (a == null) { + if (b == null) + return true; + else + return false; + } + + return a.equals(b); + } + + protected static boolean xequals(boolean a, boolean b) { return a == b; } + protected static boolean xequals(int a, int b) { return a == b; } + protected static boolean xequals(char a, char b) { return a == b; } + + public boolean equals(Object o) { + if (this == o) + return true; + if (! (o instanceof GroupPermissionsDesc)) + return false; + GroupPermissionsDesc that = (GroupPermissionsDesc) o; + return xequals(this.group, that.group) + && xequals(this.permissions, that.permissions) + ; + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HcatDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HcatDelegator.java new file mode 100644 index 0000000..c260787 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HcatDelegator.java @@ -0,0 +1,853 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import javax.ws.rs.core.Response; + +import org.apache.commons.exec.ExecuteException; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hive.hcatalog.templeton.tool.TempletonUtils; +import org.eclipse.jetty.http.HttpStatus; + + +/** + * Run hcat on the local server using the ExecService. This is + * the backend of the ddl web service. + */ +public class HcatDelegator extends LauncherDelegator { + private static final Log LOG = LogFactory.getLog(HcatDelegator.class); + private ExecService execService; + + public HcatDelegator(AppConfig appConf, ExecService execService) { + super(appConf); + this.execService = execService; + } + + /** + * Run the local hcat executable. + */ + public ExecBean run(String user, String exec, boolean format, + String group, String permissions) + throws NotAuthorizedException, BusyException, ExecuteException, IOException { + SecureProxySupport proxy = new SecureProxySupport(); + try { + List args = makeArgs(exec, format, group, permissions); + proxy.open(user, appConf); + + // Setup the hadoop vars to specify the user. + String cp = makeOverrideClasspath(appConf); + Map env = TempletonUtils.hadoopUserEnv(user, cp); + proxy.addEnv(env); + proxy.addArgs(args); + return execService.run(appConf.clusterHcat(), args, env); + } catch (InterruptedException e) { + throw new IOException(e); + } finally { + if (proxy != null) + proxy.close(); + } + } + + private List makeArgs(String exec, boolean format, + String group, String permissions) { + ArrayList args = new ArrayList(); + args.add("-e"); + args.add(exec); + if (TempletonUtils.isset(group)) { + args.add("-g"); + args.add(group); + } + if (TempletonUtils.isset(permissions)) { + args.add("-p"); + args.add(permissions); + } + if (format) { + args.add("-D"); + args.add("hive.ddl.output.format=json"); + // Use both args to ease development. Delete this one on + // May 1. + args.add("-D"); + args.add("hive.format=json"); + } + LOG.info("Main.getAppConfigInstance().get(AppConfig.UNIT_TEST_MODE)=" + + Main.getAppConfigInstance().get(AppConfig.UNIT_TEST_MODE)); + if(System.getProperty("hive.metastore.warehouse.dir") != null) { + /*when running in unit test mode, pass this property to HCat, + which will in turn pass it to Hive to make sure that Hive + tries to write to a directory that exists.*/ + args.add("-D"); + args.add("hive.metastore.warehouse.dir=" + System.getProperty("hive.metastore.warehouse.dir")); + } + return args; + } + + /** + * Return a json description of the database. + */ + public Response descDatabase(String user, String db, boolean extended) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = "desc database " + db + "; "; + if (extended) + exec = "desc database extended " + db + "; "; + + try { + String res = jsonRun(user, exec); + return JsonBuilder.create(res).build(); + } catch (HcatException e) { + throw new HcatException("unable to describe database: " + db, + e.execBean, exec); + } + } + + /** + * Return a json "show databases like". This will return a list of + * databases. + */ + public Response listDatabases(String user, String dbPattern) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = String.format("show databases like '%s';", dbPattern); + try { + String res = jsonRun(user, exec); + return JsonBuilder.create(res) + .build(); + } catch (HcatException e) { + throw new HcatException("unable to show databases for: " + dbPattern, + e.execBean, exec); + } + } + + /** + * Create a database with the given name + */ + public Response createDatabase(String user, DatabaseDesc desc) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = "create database"; + if (desc.ifNotExists) + exec += " if not exists"; + exec += " " + desc.database; + if (TempletonUtils.isset(desc.comment)) + exec += String.format(" comment '%s'", desc.comment); + if (TempletonUtils.isset(desc.location)) + exec += String.format(" location '%s'", desc.location); + if (TempletonUtils.isset(desc.properties)) + exec += String.format(" with dbproperties (%s)", + makePropertiesStatement(desc.properties)); + exec += ";"; + + String res = jsonRun(user, exec, desc.group, desc.permissions); + return JsonBuilder.create(res) + .put("database", desc.database) + .build(); + } + + /** + * Drop the given database + */ + public Response dropDatabase(String user, String db, + boolean ifExists, String option, + String group, String permissions) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = "drop database"; + if (ifExists) + exec += " if exists"; + exec += " " + db; + if (TempletonUtils.isset(option)) + exec += " " + option; + exec += ";"; + + String res = jsonRun(user, exec, group, permissions); + return JsonBuilder.create(res) + .put("database", db) + .build(); + } + + /** + * Create a table. + */ + public Response createTable(String user, String db, TableDesc desc) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = makeCreateTable(db, desc); + + try { + String res = jsonRun(user, exec, desc.group, desc.permissions, true); + + return JsonBuilder.create(res) + .put("database", db) + .put("table", desc.table) + .build(); + } catch (final HcatException e) { + throw new HcatException("unable to create table: " + desc.table, + e.execBean, exec); + } + } + + /** + * Create a table like another. + */ + public Response createTableLike(String user, String db, TableLikeDesc desc) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = String.format("use %s; create", db); + + if (desc.external) + exec += " external"; + exec += String.format(" table %s like %s", desc.newTable, desc.existingTable); + if (TempletonUtils.isset(desc.location)) + exec += String.format(" location '%s'", desc.location); + exec += ";"; + + try { + String res = jsonRun(user, exec, desc.group, desc.permissions, true); + + return JsonBuilder.create(res) + .put("database", db) + .put("table", desc.newTable) + .build(); + } catch (final HcatException e) { + throw new HcatException("unable to create table: " + desc.newTable, + e.execBean, exec); + } + } + + /** + * Return a json description of the table. + */ + public Response descTable(String user, String db, String table, boolean extended) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = "use " + db + "; "; + if (extended) + exec += "desc extended " + table + "; "; + else + exec += "desc " + table + "; "; + try { + String res = jsonRun(user, exec); + return JsonBuilder.create(res) + .put("database", db) + .put("table", table) + .build(); + } catch (HcatException e) { + throw new HcatException("unable to describe database: " + db, + e.execBean, exec); + } + } + + /** + * Return a json "show table like". This will return a list of + * tables. + */ + public Response listTables(String user, String db, String tablePattern) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = String.format("use %s; show tables like '%s';", + db, tablePattern); + try { + String res = jsonRun(user, exec); + return JsonBuilder.create(res) + .put("database", db) + .build(); + } catch (HcatException e) { + throw new HcatException("unable to show tables for: " + tablePattern, + e.execBean, exec); + } + } + + /** + * Return a json "show table extended like". This will return + * only the first single table. + */ + public Response descExtendedTable(String user, String db, String table) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = String.format("use %s; show table extended like %s;", + db, table); + try { + String res = jsonRun(user, exec); + JsonBuilder jb = JsonBuilder.create(singleTable(res, table)) + .remove("tableName") + .put("database", db) + .put("table", table); + + // If we can get them from HDFS, add group and permission + String loc = (String) jb.getMap().get("location"); + if (loc != null && loc.startsWith("hdfs://")) { + try { + FileSystem fs = FileSystem.get(appConf); + FileStatus status = fs.getFileStatus(new Path(new URI(loc))); + jb.put("group", status.getGroup()); + jb.put("permission", status.getPermission().toString()); + } catch (Exception e) { + LOG.warn(e.getMessage() + " Couldn't get permissions for " + loc); + } + } + return jb.build(); + } catch (HcatException e) { + throw new HcatException("unable to show table: " + table, e.execBean, exec); + } + } + + // Format a list of Columns for a create statement + private String makeCols(List cols) { + ArrayList res = new ArrayList(); + for (ColumnDesc col : cols) + res.add(makeOneCol(col)); + return StringUtils.join(res, ", "); + } + + // Format a Column for a create statement + private String makeOneCol(ColumnDesc col) { + String res = String.format("%s %s", col.name, col.type); + if (TempletonUtils.isset(col.comment)) + res += String.format(" comment '%s'", col.comment); + return res; + } + + // Make a create table statement + private String makeCreateTable(String db, TableDesc desc) { + String exec = String.format("use %s; create", db); + + if (desc.external) + exec += " external"; + exec += " table"; + if (desc.ifNotExists) + exec += " if not exists"; + exec += " " + desc.table; + + if (TempletonUtils.isset(desc.columns)) + exec += String.format("(%s)", makeCols(desc.columns)); + if (TempletonUtils.isset(desc.comment)) + exec += String.format(" comment '%s'", desc.comment); + if (TempletonUtils.isset(desc.partitionedBy)) + exec += String.format(" partitioned by (%s)", makeCols(desc.partitionedBy)); + if (desc.clusteredBy != null) + exec += String.format(" clustered by %s", makeClusteredBy(desc.clusteredBy)); + if (desc.format != null) + exec += " " + makeStorageFormat(desc.format); + if (TempletonUtils.isset(desc.location)) + exec += String.format(" location '%s'", desc.location); + if (TempletonUtils.isset(desc.tableProperties)) + exec += String.format(" tblproperties (%s)", + makePropertiesStatement(desc.tableProperties)); + exec += ";"; + + return exec; + } + + // Format a clustered by statement + private String makeClusteredBy(TableDesc.ClusteredByDesc desc) { + String res = String.format("(%s)", StringUtils.join(desc.columnNames, ", ")); + if (TempletonUtils.isset(desc.sortedBy)) + res += String.format(" sorted by (%s)", makeClusterSortList(desc.sortedBy)); + res += String.format(" into %s buckets", desc.numberOfBuckets); + + return res; + } + + // Format a sorted by statement + private String makeClusterSortList(List descs) { + ArrayList res = new ArrayList(); + for (TableDesc.ClusterSortOrderDesc desc : descs) + res.add(makeOneClusterSort(desc)); + return StringUtils.join(res, ", "); + } + + // Format a single cluster sort statement + private String makeOneClusterSort(TableDesc.ClusterSortOrderDesc desc) { + return String.format("%s %s", desc.columnName, desc.order.toString()); + } + + // Format the storage format statements + private String makeStorageFormat(TableDesc.StorageFormatDesc desc) { + String res = ""; + + if (desc.rowFormat != null) + res += makeRowFormat(desc.rowFormat); + if (TempletonUtils.isset(desc.storedAs)) + res += String.format(" stored as %s", desc.storedAs); + if (desc.storedBy != null) + res += " " + makeStoredBy(desc.storedBy); + + return res; + } + + // Format the row format statement + private String makeRowFormat(TableDesc.RowFormatDesc desc) { + String res = + makeTermBy(desc.fieldsTerminatedBy, "fields") + + makeTermBy(desc.collectionItemsTerminatedBy, "collection items") + + makeTermBy(desc.mapKeysTerminatedBy, "map keys") + + makeTermBy(desc.linesTerminatedBy, "lines"); + + if (TempletonUtils.isset(res)) + return "row format delimited" + res; + else if (desc.serde != null) + return makeSerdeFormat(desc.serde); + else + return ""; + } + + // A row format terminated by clause + private String makeTermBy(String sep, String fieldName) { + + if (TempletonUtils.isset(sep)) + return String.format(" %s terminated by '%s'", fieldName, sep); + else + return ""; + } + + // Format the serde statement + private String makeSerdeFormat(TableDesc.SerdeDesc desc) { + String res = "row format serde " + desc.name; + if (TempletonUtils.isset(desc.properties)) + res += String.format(" with serdeproperties (%s)", + makePropertiesStatement(desc.properties)); + return res; + } + + // Format the properties statement + private String makePropertiesStatement(Map properties) { + ArrayList res = new ArrayList(); + for (Map.Entry e : properties.entrySet()) + res.add(String.format("'%s'='%s'", e.getKey(), e.getValue())); + return StringUtils.join(res, ", "); + } + + // Format the stored by statement + private String makeStoredBy(TableDesc.StoredByDesc desc) { + String res = String.format("stored by '%s'", desc.className); + if (TempletonUtils.isset(desc.properties)) + res += String.format(" with serdeproperties (%s)", + makePropertiesStatement(desc.properties)); + return res; + } + + // Pull out the first table from the "show extended" json. + private String singleTable(String json, String table) + throws IOException { + Map obj = JsonBuilder.jsonToMap(json); + if (JsonBuilder.isError(obj)) + return json; + + List tables = (List) obj.get("tables"); + if (TempletonUtils.isset(tables)) + return JsonBuilder.mapToJson(tables.get(0)); + else { + return JsonBuilder + .createError(ErrorMsg.INVALID_TABLE.format(table), + ErrorMsg.INVALID_TABLE.getErrorCode()). + buildJson(); + } + } + + /** + * Drop a table. + */ + public Response dropTable(String user, String db, + String table, boolean ifExists, + String group, String permissions) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = String.format("use %s; drop table", db); + if (ifExists) + exec += " if exists"; + exec += String.format(" %s;", table); + + try { + String res = jsonRun(user, exec, group, permissions, true); + return JsonBuilder.create(res) + .put("database", db) + .put("table", table) + .build(); + } catch (HcatException e) { + throw new HcatException("unable to drop table: " + table, e.execBean, exec); + } + } + + /** + * Rename a table. + */ + public Response renameTable(String user, String db, + String oldTable, String newTable, + String group, String permissions) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = String.format("use %s; alter table %s rename to %s;", + db, oldTable, newTable); + try { + String res = jsonRun(user, exec, group, permissions, true); + return JsonBuilder.create(res) + .put("database", db) + .put("table", newTable) + .build(); + } catch (HcatException e) { + throw new HcatException("unable to rename table: " + oldTable, + e.execBean, exec); + } + } + + /** + * Describe one table property. + */ + public Response descTableProperty(String user, String db, + String table, String property) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + Response res = descTable(user, db, table, true); + if (res.getStatus() != HttpStatus.OK_200) + return res; + Map props = tableProperties(res.getEntity()); + Map found = null; + if (props != null) { + String value = (String) props.get(property); + if (value != null) { + found = new HashMap(); + found.put(property, value); + } + } + + return JsonBuilder.create() + .put("database", db) + .put("table", table) + .put("property", found) + .build(); + } + + /** + * List the table properties. + */ + public Response listTableProperties(String user, String db, String table) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + Response res = descTable(user, db, table, true); + if (res.getStatus() != HttpStatus.OK_200) + return res; + Map props = tableProperties(res.getEntity()); + return JsonBuilder.create() + .put("database", db) + .put("table", table) + .put("properties", props) + .build(); + } + + /** + * Add one table property. + */ + public Response addOneTableProperty(String user, String db, String table, + TablePropertyDesc desc) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec + = String.format("use %s; alter table %s set tblproperties ('%s'='%s');", + db, table, desc.name, desc.value); + try { + String res = jsonRun(user, exec, desc.group, desc.permissions, true); + return JsonBuilder.create(res) + .put("database", db) + .put("table", table) + .put("property", desc.name) + .build(); + } catch (HcatException e) { + throw new HcatException("unable to add table property: " + table, + e.execBean, exec); + } + } + + private Map tableProperties(Object extendedTable) { + if (!(extendedTable instanceof Map)) + return null; + Map m = (Map) extendedTable; + Map tableInfo = (Map) m.get("tableInfo"); + if (tableInfo == null) + return null; + + return (Map) tableInfo.get("parameters"); + } + + /** + * Return a json description of the partitions. + */ + public Response listPartitions(String user, String db, String table) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = "use " + db + "; "; + exec += "show partitions " + table + "; "; + try { + String res = jsonRun(user, exec); + return JsonBuilder.create(res) + .put("database", db) + .put("table", table) + .build(); + } catch (HcatException e) { + throw new HcatException("unable to show partitions for table: " + table, + e.execBean, exec); + } + } + + /** + * Return a json description of one partition. + */ + public Response descOnePartition(String user, String db, String table, + String partition) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = "use " + db + "; "; + exec += "show table extended like " + table + + " partition (" + partition + "); "; + try { + String res = jsonRun(user, exec); + return JsonBuilder.create(singleTable(res, table)) + .remove("tableName") + .put("database", db) + .put("table", table) + .put("partition", partition) + .build(); + } catch (HcatException e) { + if (e.execBean.stderr.contains("SemanticException") && + e.execBean.stderr.contains("Partition not found")) { + String emsg = "Partition " + partition + " for table " + + table + " does not exist" + db + "." + table + " does not exist"; + return JsonBuilder.create() + .put("error", emsg) + //this error should really be produced by Hive (DDLTask) + .put("errorCode", ErrorMsg.INVALID_PARTITION.getErrorCode()) + .put("database", db) + .put("table", table) + .put("partition", partition) + .build(); + } + + throw new HcatException("unable to show partition: " + + table + " " + partition, + e.execBean, + exec); + } + } + + /** + * Add one partition. + */ + public Response addOnePartition(String user, String db, String table, + PartitionDesc desc) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = String.format("use %s; alter table %s add", db, table); + if (desc.ifNotExists) + exec += " if not exists"; + exec += String.format(" partition (%s)", desc.partition); + if (TempletonUtils.isset(desc.location)) + exec += String.format(" location '%s'", desc.location); + exec += ";"; + try { + String res = jsonRun(user, exec, desc.group, desc.permissions, true); + if (res.indexOf("AlreadyExistsException") > -1) { + return JsonBuilder.create(). + put("error", "Partition already exists") + //This error code should really be produced by Hive + .put("errorCode", ErrorMsg.PARTITION_EXISTS.getErrorCode()) + .put("database", db) + .put("table", table) + .put("partition", desc.partition).build(); + } + return JsonBuilder.create(res) + .put("database", db) + .put("table", table) + .put("partition", desc.partition) + .build(); + } catch (HcatException e) { + throw new HcatException("unable to add partition: " + desc, + e.execBean, exec); + } + } + + /** + * Drop a partition. + */ + public Response dropPartition(String user, String db, + String table, String partition, boolean ifExists, + String group, String permissions) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = String.format("use %s; alter table %s drop", db, table); + if (ifExists) + exec += " if exists"; + exec += String.format(" partition (%s);", partition); + + try { + String res = jsonRun(user, exec, group, permissions, true); + return JsonBuilder.create(res) + .put("database", db) + .put("table", table) + .put("partition", partition) + .build(); + } catch (HcatException e) { + throw new HcatException("unable to drop partition: " + partition, + e.execBean, exec); + } + } + + /** + * Return a json description of the columns. Same as + * describeTable. + */ + public Response listColumns(String user, String db, String table) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + try { + return descTable(user, db, table, false); + } catch (HcatException e) { + throw new HcatException("unable to show columns for table: " + table, + e.execBean, e.statement); + } + } + + /** + * Return a json description of one column. + */ + public Response descOneColumn(String user, String db, String table, String column) + throws SimpleWebException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + Response res = listColumns(user, db, table); + if (res.getStatus() != HttpStatus.OK_200) + return res; + + Object o = res.getEntity(); + final Map fields = (o != null && (o instanceof Map)) ? (Map) o : null; + if (fields == null) + throw new SimpleWebException(HttpStatus.INTERNAL_SERVER_ERROR_500, "Internal error, unable to find column " + + column); + + + List cols = (List) fields.get("columns"); + Map found = null; + if (cols != null) { + for (Map col : cols) { + if (column.equals(col.get("name"))) { + found = col; + break; + } + } + } + if (found == null) + throw new SimpleWebException(HttpStatus.INTERNAL_SERVER_ERROR_500, "unable to find column " + column, + new HashMap() { + { + put("description", fields); + } + }); + fields.remove("columns"); + fields.put("column", found); + return Response.fromResponse(res).entity(fields).build(); + } + + /** + * Add one column. + */ + public Response addOneColumn(String user, String db, String table, + ColumnDesc desc) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + String exec = String.format("use %s; alter table %s add columns (%s %s", + db, table, desc.name, desc.type); + if (TempletonUtils.isset(desc.comment)) + exec += String.format(" comment '%s'", desc.comment); + exec += ");"; + try { + String res = jsonRun(user, exec, desc.group, desc.permissions, true); + return JsonBuilder.create(res) + .put("database", db) + .put("table", table) + .put("column", desc.name) + .build(); + } catch (HcatException e) { + throw new HcatException("unable to add column: " + desc, + e.execBean, exec); + } + } + + // Check that the hcat result is valid and or has a valid json + // error + private boolean isValid(ExecBean eb, boolean requireEmptyOutput) { + if (eb == null) + return false; + + try { + Map m = JsonBuilder.jsonToMap(eb.stdout); + if (m.containsKey("error")) // This is a valid error message. + return true; + } catch (IOException e) { + return false; + } + + if (eb.exitcode != 0) + return false; + + if (requireEmptyOutput) + if (TempletonUtils.isset(eb.stdout)) + return false; + + return true; + } + + // Run an hcat expression and return just the json outout. + private String jsonRun(String user, String exec, + String group, String permissions, + boolean requireEmptyOutput) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + ExecBean res = run(user, exec, true, group, permissions); + + if (!isValid(res, requireEmptyOutput)) + throw new HcatException("Failure calling hcat: " + exec, res, exec); + + return res.stdout; + } + + // Run an hcat expression and return just the json outout. No + // permissions set. + private String jsonRun(String user, String exec) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + return jsonRun(user, exec, null, null); + } + + // Run an hcat expression and return just the json outout. + private String jsonRun(String user, String exec, + String group, String permissions) + throws HcatException, NotAuthorizedException, BusyException, + ExecuteException, IOException { + return jsonRun(user, exec, group, permissions, false); + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HcatException.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HcatException.java new file mode 100644 index 0000000..68ad1ad --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HcatException.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import org.eclipse.jetty.http.HttpStatus; + +import java.util.HashMap; + +/** + * Unable to run hcat on the job. + */ +public class HcatException extends SimpleWebException { + public ExecBean execBean; + public String statement; + + public HcatException(String msg, final ExecBean bean, final String statement) { + super(HttpStatus.INTERNAL_SERVER_ERROR_500, msg, new HashMap() { + { + put("exec", bean); + put("statement", statement); + } + }); + execBean = bean; + this.statement = statement; + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java new file mode 100644 index 0000000..1e679b1 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java @@ -0,0 +1,116 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.exec.ExecuteException; +import org.apache.hive.hcatalog.templeton.tool.TempletonControllerJob; +import org.apache.hive.hcatalog.templeton.tool.TempletonUtils; + +/** + * Submit a Hive job. + * + * This is the backend of the hive web service. + */ +public class HiveDelegator extends LauncherDelegator { + + public HiveDelegator(AppConfig appConf) { + super(appConf); + } + + public EnqueueBean run(String user, + String execute, String srcFile, List defines, + String statusdir, String callback, String completedUrl) + throws NotAuthorizedException, BadParam, BusyException, QueueException, + ExecuteException, IOException, InterruptedException + { + runAs = user; + List args = makeArgs(execute, srcFile, defines, statusdir, + completedUrl); + + return enqueueController(user, callback, args); + } + + private List makeArgs(String execute, String srcFile, + List defines, String statusdir, String completedUrl) + throws BadParam, IOException, InterruptedException + { + ArrayList args = new ArrayList(); + try { + args.addAll(makeBasicArgs(execute, srcFile, statusdir, completedUrl)); + args.add("--"); + args.add(appConf.hivePath()); + + args.add("--service"); + args.add("cli"); + + //the token file location as initial hiveconf arg + args.add("--hiveconf"); + args.add(TempletonControllerJob.TOKEN_FILE_ARG_PLACEHOLDER); + + for (String prop : appConf.getStrings(AppConfig.HIVE_PROPS_NAME)) { + args.add("--hiveconf"); + args.add(prop); + } + for (String prop : defines) { + args.add("--hiveconf"); + args.add(prop); + } + if (TempletonUtils.isset(execute)) { + args.add("-e"); + args.add(execute); + } else if (TempletonUtils.isset(srcFile)) { + args.add("-f"); + args.add(TempletonUtils.hadoopFsPath(srcFile, appConf, runAs) + .getName()); + } + } catch (FileNotFoundException e) { + throw new BadParam(e.getMessage()); + } catch (URISyntaxException e) { + throw new BadParam(e.getMessage()); + } + + return args; + } + + private List makeBasicArgs(String execute, String srcFile, + String statusdir, String completedUrl) + throws URISyntaxException, FileNotFoundException, IOException, + InterruptedException + { + ArrayList args = new ArrayList(); + + ArrayList allFiles = new ArrayList(); + if (TempletonUtils.isset(srcFile)) + allFiles.add(TempletonUtils.hadoopFsFilename(srcFile, appConf, + runAs)); + + args.addAll(makeLauncherArgs(appConf, statusdir, completedUrl, allFiles)); + + args.add("-archives"); + args.add(appConf.hiveArchive()); + + return args; + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/JarDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/JarDelegator.java new file mode 100644 index 0000000..05d74cb --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/JarDelegator.java @@ -0,0 +1,98 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.exec.ExecuteException; +import org.apache.hive.hcatalog.templeton.tool.TempletonControllerJob; +import org.apache.hive.hcatalog.templeton.tool.TempletonUtils; + +/** + * Submit a job to the MapReduce queue. + * + * This is the backend of the mapreduce/jar web service. + */ +public class JarDelegator extends LauncherDelegator { + public JarDelegator(AppConfig appConf) { + super(appConf); + } + + public EnqueueBean run(String user, String jar, String mainClass, + String libjars, String files, + List jarArgs, List defines, + String statusdir, String callback, String completedUrl) + throws NotAuthorizedException, BadParam, BusyException, QueueException, + ExecuteException, IOException, InterruptedException { + runAs = user; + List args = makeArgs(jar, mainClass, + libjars, files, jarArgs, defines, + statusdir, completedUrl); + + return enqueueController(user, callback, args); + } + + private List makeArgs(String jar, String mainClass, + String libjars, String files, + List jarArgs, List defines, + String statusdir, String completedUrl) + throws BadParam, IOException, InterruptedException { + ArrayList args = new ArrayList(); + try { + ArrayList allFiles = new ArrayList(); + allFiles.add(TempletonUtils.hadoopFsFilename(jar, appConf, runAs)); + + args.addAll(makeLauncherArgs(appConf, statusdir, + completedUrl, allFiles)); + args.add("--"); + args.add(appConf.clusterHadoop()); + args.add("jar"); + args.add(TempletonUtils.hadoopFsPath(jar, appConf, runAs).getName()); + if (TempletonUtils.isset(mainClass)) + args.add(mainClass); + if (TempletonUtils.isset(libjars)) { + args.add("-libjars"); + args.add(TempletonUtils.hadoopFsListAsString(libjars, appConf, + runAs)); + } + if (TempletonUtils.isset(files)) { + args.add("-files"); + args.add(TempletonUtils.hadoopFsListAsString(files, appConf, + runAs)); + } + //the token file location comes after mainClass, as a -Dprop=val + args.add("-D" + TempletonControllerJob.TOKEN_FILE_ARG_PLACEHOLDER); + + for (String d : defines) + args.add("-D" + d); + + args.addAll(jarArgs); + } catch (FileNotFoundException e) { + throw new BadParam(e.getMessage()); + } catch (URISyntaxException e) { + throw new BadParam(e.getMessage()); + } + + return args; + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/JsonBuilder.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/JsonBuilder.java new file mode 100644 index 0000000..2a1fd75 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/JsonBuilder.java @@ -0,0 +1,194 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; + +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hive.hcatalog.templeton.tool.TempletonUtils; +import org.codehaus.jackson.map.ObjectMapper; +import org.eclipse.jetty.http.HttpStatus; + +/** + * Helper class to build new json objects with new top level + * properties. Only add non-null entries. + */ +public class JsonBuilder { + private static final Map hiveError2HttpStatusCode = new HashMap(); + + /** + * It's expected that Hive (and thus HCat CLI) will return canonical error msgs/codes. + * Here they are mapped to appropriate HTTP Status Code. + */ + static { + hiveError2HttpStatusCode.put(ErrorMsg.GENERIC_ERROR.getErrorCode(), HttpStatus.INTERNAL_SERVER_ERROR_500); + hiveError2HttpStatusCode.put(ErrorMsg.DATABASE_NOT_EXISTS.getErrorCode(), HttpStatus.NOT_FOUND_404); + hiveError2HttpStatusCode.put(ErrorMsg.INVALID_TABLE.getErrorCode(), HttpStatus.NOT_FOUND_404); + hiveError2HttpStatusCode.put(ErrorMsg.TABLE_NOT_PARTITIONED.getErrorCode(), HttpStatus.NOT_FOUND_404); + hiveError2HttpStatusCode.put(ErrorMsg.INVALID_PARTITION.getErrorCode(), HttpStatus.NOT_FOUND_404); + + hiveError2HttpStatusCode.put(ErrorMsg.DUPLICATE_COLUMN_NAMES.getErrorCode(), HttpStatus.CONFLICT_409); + hiveError2HttpStatusCode.put(ErrorMsg.DATABSAE_ALREADY_EXISTS.getErrorCode(), HttpStatus.CONFLICT_409); + hiveError2HttpStatusCode.put(ErrorMsg.PARTITION_EXISTS.getErrorCode(), HttpStatus.CONFLICT_409); + hiveError2HttpStatusCode.put(ErrorMsg.TABLE_ALREADY_EXISTS.getErrorCode(), HttpStatus.CONFLICT_409); + } + + // The map we're building. + private Map map; + + // Parse the json map. + private JsonBuilder(String json) + throws IOException { + map = jsonToMap(json); + } + + /** + * Create a new map object from the existing json. + */ + public static JsonBuilder create(String json) + throws IOException { + return new JsonBuilder(json); + } + + /** + * Create a new map object. + */ + public static JsonBuilder create() + throws IOException { + return new JsonBuilder(null); + } + + /** + * Create a new map error object. + */ + public static JsonBuilder createError(String msg, int errorCode) + throws IOException { + return new JsonBuilder(null) + .put("error", msg) + .put("errorCode", errorCode); + } + + /** + * Add a non-null value to the map. + */ + public JsonBuilder put(String name, Object val) { + if (val != null) + map.put(name, val); + return this; + } + + /** + * Remove a value from the map. + */ + public JsonBuilder remove(String name) { + map.remove(name); + return this; + } + + /** + * Get the underlying map. + */ + public Map getMap() { + return map; + } + + /** + * Turn the map back to response object. + */ + public Response build() { + return buildResponse(); + } + + /** + * Turn the map back to json. + */ + public String buildJson() + throws IOException { + return mapToJson(map); + } + + /** + * Turn the map back to response object. + */ + public Response buildResponse() { + int status = HttpStatus.OK_200; // Server ok. + if (map.containsKey("error")) + status = HttpStatus.INTERNAL_SERVER_ERROR_500; // Generic http server error. + Object o = map.get("errorCode"); + if (o != null) { + if(hiveError2HttpStatusCode.containsKey(o)) { + status = hiveError2HttpStatusCode.get(o); + } + } + return buildResponse(status); + } + + /** + * Turn the map back to response object. + */ + public Response buildResponse(int status) { + return Response.status(status) + .entity(map) + .type(MediaType.APPLICATION_JSON) + .build(); + } + + /** + * Is the object non-empty? + */ + public boolean isset() { + return TempletonUtils.isset(map); + } + + /** + * Check if this is an error doc. + */ + public static boolean isError(Map obj) { + return (obj != null) && obj.containsKey("error"); + } + + /** + * Convert a json string to a Map. + */ + public static Map jsonToMap(String json) + throws IOException { + if (!TempletonUtils.isset(json)) + return new HashMap(); + else { + ObjectMapper mapper = new ObjectMapper(); + return mapper.readValue(json, Map.class); + } + } + + /** + * Convert a map to a json string. + */ + public static String mapToJson(Object obj) + throws IOException { + ObjectMapper mapper = new ObjectMapper(); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + mapper.writeValue(out, obj); + return out.toString(); + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/LauncherDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/LauncherDelegator.java new file mode 100644 index 0000000..1fa6be4 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/LauncherDelegator.java @@ -0,0 +1,198 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.io.IOException; +import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.exec.ExecuteException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.util.ToolRunner; +import org.apache.hive.hcatalog.templeton.tool.JobState; +import org.apache.hive.hcatalog.templeton.tool.TempletonControllerJob; +import org.apache.hive.hcatalog.templeton.tool.TempletonStorage; +import org.apache.hive.hcatalog.templeton.tool.TempletonUtils; +import org.apache.hive.hcatalog.templeton.tool.ZooKeeperStorage; + +/** + * The helper class for all the Templeton delegator classes that + * launch child jobs. + */ +public class LauncherDelegator extends TempletonDelegator { + private static final Log LOG = LogFactory.getLog(LauncherDelegator.class); + protected String runAs = null; + + public LauncherDelegator(AppConfig appConf) { + super(appConf); + } + + public void registerJob(String id, String user, String callback) + throws IOException { + JobState state = null; + try { + state = new JobState(id, Main.getAppConfigInstance()); + state.setUser(user); + state.setCallback(callback); + } finally { + if (state != null) + state.close(); + } + } + + /** + * Enqueue the TempletonControllerJob directly calling doAs. + */ + public EnqueueBean enqueueController(String user, String callback, + List args) + throws NotAuthorizedException, BusyException, ExecuteException, + IOException, QueueException { + try { + UserGroupInformation ugi = UgiFactory.getUgi(user); + + final long startTime = System.nanoTime(); + + String id = queueAsUser(ugi, args); + + long elapsed = ((System.nanoTime() - startTime) / ((int) 1e6)); + LOG.debug("queued job " + id + " in " + elapsed + " ms"); + + if (id == null) + throw new QueueException("Unable to get job id"); + + registerJob(id, user, callback); + + return new EnqueueBean(id); + } catch (InterruptedException e) { + throw new QueueException("Unable to launch job " + e); + } + } + + private String queueAsUser(UserGroupInformation ugi, final List args) + throws IOException, InterruptedException { + String id = ugi.doAs(new PrivilegedExceptionAction() { + public String run() throws Exception { + String[] array = new String[args.size()]; + TempletonControllerJob ctrl = new TempletonControllerJob(); + ToolRunner.run(ctrl, args.toArray(array)); + return ctrl.getSubmittedId(); + } + }); + + return id; + } + + public List makeLauncherArgs(AppConfig appConf, String statusdir, + String completedUrl, + List copyFiles) { + ArrayList args = new ArrayList(); + + args.add("-libjars"); + args.add(appConf.libJars()); + addCacheFiles(args, appConf); + + // Hadoop vars + addDef(args, "user.name", runAs); + addDef(args, AppConfig.HADOOP_SPECULATIVE_NAME, "false"); + addDef(args, AppConfig.HADOOP_CHILD_JAVA_OPTS, appConf.controllerMRChildOpts()); + + // Internal vars + addDef(args, TempletonControllerJob.STATUSDIR_NAME, statusdir); + addDef(args, TempletonControllerJob.COPY_NAME, + TempletonUtils.encodeArray(copyFiles)); + addDef(args, TempletonControllerJob.OVERRIDE_CLASSPATH, + makeOverrideClasspath(appConf)); + + // Hadoop queue information + addDef(args, "mapred.job.queue.name", appConf.hadoopQueueName()); + + // Job vars + addStorageVars(args); + addCompletionVars(args, completedUrl); + + return args; + } + + // Storage vars + private void addStorageVars(List args) { + addDef(args, TempletonStorage.STORAGE_CLASS, + appConf.get(TempletonStorage.STORAGE_CLASS)); + addDef(args, TempletonStorage.STORAGE_ROOT, + appConf.get(TempletonStorage.STORAGE_ROOT)); + addDef(args, ZooKeeperStorage.ZK_HOSTS, + appConf.get(ZooKeeperStorage.ZK_HOSTS)); + addDef(args, ZooKeeperStorage.ZK_SESSION_TIMEOUT, + appConf.get(ZooKeeperStorage.ZK_SESSION_TIMEOUT)); + } + + // Completion notifier vars + private void addCompletionVars(List args, String completedUrl) { + addDef(args, AppConfig.HADOOP_END_RETRY_NAME, + appConf.get(AppConfig.CALLBACK_RETRY_NAME)); + addDef(args, AppConfig.HADOOP_END_INTERVAL_NAME, + appConf.get(AppConfig.CALLBACK_INTERVAL_NAME)); + addDef(args, AppConfig.HADOOP_END_URL_NAME, completedUrl); + } + + /** + * Add files to the Distributed Cache for the controller job. + */ + public static void addCacheFiles(List args, AppConfig appConf) { + String overrides = appConf.overrideJarsString(); + if (overrides != null) { + args.add("-files"); + args.add(overrides); + } + } + + /** + * Create the override classpath, which will be added to + * HADOOP_CLASSPATH at runtime by the controller job. + */ + public static String makeOverrideClasspath(AppConfig appConf) { + String[] overrides = appConf.overrideJars(); + if (overrides == null) + return null; + + ArrayList cp = new ArrayList(); + for (String fname : overrides) { + Path p = new Path(fname); + cp.add(p.getName()); + } + return StringUtils.join(":", cp); + } + + + /** + * Add a Hadoop command line definition to args if the value is + * not null. + */ + public static void addDef(List args, String name, String val) { + if (val != null) { + args.add("-D"); + args.add(name + "=" + val); + } + } + +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ListDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ListDelegator.java new file mode 100644 index 0000000..8703862 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ListDelegator.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.io.IOException; +import java.util.List; +import java.util.ArrayList; + +import org.apache.hadoop.hive.shims.HadoopShims.WebHCatJTShim; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.mapred.JobStatus; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hive.hcatalog.templeton.tool.JobState; + +/** + * List jobs owned by a user. + */ +public class ListDelegator extends TempletonDelegator { + public ListDelegator(AppConfig appConf) { + super(appConf); + } + + public List run(String user, boolean showall) + throws NotAuthorizedException, BadParam, IOException, InterruptedException { + + UserGroupInformation ugi = UgiFactory.getUgi(user); + WebHCatJTShim tracker = null; + try { + tracker = ShimLoader.getHadoopShims().getWebHCatShim(appConf, ugi); + + ArrayList ids = new ArrayList(); + + JobStatus[] jobs = tracker.getAllJobs(); + + if (jobs != null) { + for (JobStatus job : jobs) { + JobState state = null; + try { + String id = job.getJobID().toString(); + state = new JobState(id, Main.getAppConfigInstance()); + if (showall || user.equals(state.getUser())) + ids.add(id); + } finally { + if (state != null) { + state.close(); + } + } + } + } + + return ids; + } catch (IllegalStateException e) { + throw new BadParam(e.getMessage()); + } finally { + if (tracker != null) + tracker.close(); + } + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Main.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Main.java new file mode 100644 index 0000000..82532c6 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Main.java @@ -0,0 +1,252 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import com.sun.jersey.api.core.PackagesResourceConfig; +import com.sun.jersey.spi.container.servlet.ServletContainer; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hdfs.web.AuthFilter; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.GenericOptionsParser; +import org.eclipse.jetty.rewrite.handler.RedirectPatternRule; +import org.eclipse.jetty.rewrite.handler.RewriteHandler; +import org.eclipse.jetty.server.Handler; +import org.eclipse.jetty.server.Server; +import org.eclipse.jetty.server.handler.HandlerList; +import org.eclipse.jetty.servlet.FilterHolder; +import org.eclipse.jetty.servlet.FilterMapping; +import org.eclipse.jetty.servlet.ServletContextHandler; +import org.eclipse.jetty.servlet.ServletHolder; +import org.slf4j.bridge.SLF4JBridgeHandler; + +/** + * The main executable that starts up and runs the Server. + */ +public class Main { + public static final String SERVLET_PATH = "templeton"; + private static final Log LOG = LogFactory.getLog(Main.class); + + public static final int DEFAULT_PORT = 8080; + private Server server; + + private static volatile AppConfig conf; + + /** + * Retrieve the config singleton. + */ + public static synchronized AppConfig getAppConfigInstance() { + if (conf == null) + LOG.error("Bug: configuration not yet loaded"); + return conf; + } + + Main(String[] args) { + init(args); + } + + public void init(String[] args) { + initLogger(); + conf = loadConfig(args); + conf.startCleanup(); + LOG.debug("Loaded conf " + conf); + } + + // Jersey uses java.util.logging - bridge to slf4 + private void initLogger() { + java.util.logging.Logger rootLogger + = java.util.logging.LogManager.getLogManager().getLogger(""); + for (java.util.logging.Handler h : rootLogger.getHandlers()) + rootLogger.removeHandler(h); + + SLF4JBridgeHandler.install(); + } + + public AppConfig loadConfig(String[] args) { + AppConfig cf = new AppConfig(); + try { + GenericOptionsParser parser = new GenericOptionsParser(cf, args); + if (parser.getRemainingArgs().length > 0) + usage(); + } catch (IOException e) { + LOG.error("Unable to parse options: " + e); + usage(); + } + + return cf; + } + + public void usage() { + System.err.println("usage: templeton [-Dtempleton.port=N] [-D...]"); + System.exit(1); + } + + public void run() { + int port = conf.getInt(AppConfig.PORT, DEFAULT_PORT); + try { + checkEnv(); + runServer(port); + System.out.println("templeton: listening on port " + port); + LOG.info("Templeton listening on port " + port); + } catch (Exception e) { + System.err.println("templeton: Server failed to start: " + e.getMessage()); + LOG.fatal("Server failed to start: " + e); + System.exit(1); + } + } + void stop() { + if(server != null) { + try { + server.stop(); + } + catch(Exception ex) { + LOG.warn("Failed to stop jetty.Server", ex); + } + } + } + + + private void checkEnv() { + checkCurrentDirPermissions(); + + } + + private void checkCurrentDirPermissions() { + //org.apache.commons.exec.DefaultExecutor requires + // that current directory exists + File pwd = new File("."); + if (!pwd.exists()) { + String msg = "Server failed to start: templeton: Current working directory '.' does not exist!"; + System.err.println(msg); + LOG.fatal(msg); + System.exit(1); + } + } + + public Server runServer(int port) + throws Exception { + + //Authenticate using keytab + if (UserGroupInformation.isSecurityEnabled()) { + UserGroupInformation.loginUserFromKeytab(conf.kerberosPrincipal(), + conf.kerberosKeytab()); + } + + // Create the Jetty server + Server server = new Server(port); + ServletContextHandler root = new ServletContextHandler(server, "/"); + + // Add the Auth filter + FilterHolder fHolder = makeAuthFilter(); + + /* + * We add filters for each of the URIs supported by templeton. + * If we added the entire sub-structure using '/*', the mapreduce + * notification cannot give the callback to templeton in secure mode. + * This is because mapreduce does not use secure credentials for + * callbacks. So jetty would fail the request as unauthorized. + */ + root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/ddl/*", + FilterMapping.REQUEST); + root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/pig/*", + FilterMapping.REQUEST); + root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/hive/*", + FilterMapping.REQUEST); + root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/queue/*", + FilterMapping.REQUEST); + root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/mapreduce/*", + FilterMapping.REQUEST); + root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/status/*", + FilterMapping.REQUEST); + root.addFilter(fHolder, "/" + SERVLET_PATH + "/v1/version/*", + FilterMapping.REQUEST); + + // Connect Jersey + ServletHolder h = new ServletHolder(new ServletContainer(makeJerseyConfig())); + root.addServlet(h, "/" + SERVLET_PATH + "/*"); + // Add any redirects + addRedirects(server); + + // Start the server + server.start(); + this.server = server; + return server; + } + + // Configure the AuthFilter with the Kerberos params iff security + // is enabled. + public FilterHolder makeAuthFilter() { + FilterHolder authFilter = new FilterHolder(AuthFilter.class); + if (UserGroupInformation.isSecurityEnabled()) { + authFilter.setInitParameter("dfs.web.authentication.signature.secret", + conf.kerberosSecret()); + authFilter.setInitParameter("dfs.web.authentication.kerberos.principal", + conf.kerberosPrincipal()); + authFilter.setInitParameter("dfs.web.authentication.kerberos.keytab", + conf.kerberosKeytab()); + } + return authFilter; + } + + public PackagesResourceConfig makeJerseyConfig() { + PackagesResourceConfig rc + = new PackagesResourceConfig("org.apache.hive.hcatalog.templeton"); + HashMap props = new HashMap(); + props.put("com.sun.jersey.api.json.POJOMappingFeature", "true"); + props.put("com.sun.jersey.config.property.WadlGeneratorConfig", + "org.apache.hive.hcatalog.templeton.WadlConfig"); + rc.setPropertiesAndFeatures(props); + + return rc; + } + + public void addRedirects(Server server) { + RewriteHandler rewrite = new RewriteHandler(); + + RedirectPatternRule redirect = new RedirectPatternRule(); + redirect.setPattern("/templeton/v1/application.wadl"); + redirect.setLocation("/templeton/application.wadl"); + rewrite.addRule(redirect); + + HandlerList handlerlist = new HandlerList(); + ArrayList handlers = new ArrayList(); + + // Any redirect handlers need to be added first + handlers.add(rewrite); + + // Now add all the default handlers + for (Handler handler : server.getHandlers()) { + handlers.add(handler); + } + Handler[] newlist = new Handler[handlers.size()]; + handlerlist.setHandlers(handlers.toArray(newlist)); + server.setHandler(handlerlist); + } + + public static void main(String[] args) { + Main templeton = new Main(args); + templeton.run(); + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/MaxByteArrayOutputStream.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/MaxByteArrayOutputStream.java new file mode 100644 index 0000000..6f0371a --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/MaxByteArrayOutputStream.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.io.ByteArrayOutputStream; + +/** + * An output stream that will only accept the first N bytes of data. + */ +public class MaxByteArrayOutputStream extends ByteArrayOutputStream { + /** + * The max number of bytes stored. + */ + private int maxBytes; + + /** + * The number of bytes currently stored. + */ + private int nBytes; + + /** + * Create. + */ + public MaxByteArrayOutputStream(int maxBytes) { + this.maxBytes = maxBytes; + nBytes = 0; + } + + /** + * Writes the specified byte to this byte array output stream. + * Any bytes after the first maxBytes will be ignored. + * + * @param b the byte to be written. + */ + public synchronized void write(int b) { + if (nBytes < maxBytes) { + ++nBytes; + super.write(b); + } + } + + /** + * Writes len bytes from the specified byte array + * starting at offset off to this byte array output stream. + * Any bytes after the first maxBytes will be ignored. + * + * @param b the data. + * @param off the start offset in the data. + * @param len the number of bytes to write. + */ + public synchronized void write(byte b[], int off, int len) { + int storable = Math.min(maxBytes - nBytes, len); + if (storable > 0) { + nBytes += storable; + super.write(b, off, storable); + } + } + + +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/NotAuthorizedException.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/NotAuthorizedException.java new file mode 100644 index 0000000..fc1c0e4 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/NotAuthorizedException.java @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import org.eclipse.jetty.http.HttpStatus; + +/** + * Simple "user not found" type exception. + */ +public class NotAuthorizedException extends SimpleWebException { + public NotAuthorizedException(String msg) { + super(HttpStatus.UNAUTHORIZED_401, msg); + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/PartitionDesc.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/PartitionDesc.java new file mode 100644 index 0000000..6614f57 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/PartitionDesc.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import javax.xml.bind.annotation.XmlRootElement; + +/** + * A description of the partition to create. + */ +@XmlRootElement +public class PartitionDesc extends GroupPermissionsDesc { + public String partition; + public String location; + public boolean ifNotExists = false; + + public PartitionDesc() {} + + public String toString() { + return String.format("PartitionDesc(partition=%s, location=%s, ifNotExists=%s)", + partition, location, ifNotExists); + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/PigDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/PigDelegator.java new file mode 100644 index 0000000..b7c1823 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/PigDelegator.java @@ -0,0 +1,97 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.exec.ExecuteException; +import org.apache.hive.hcatalog.templeton.tool.TempletonControllerJob; +import org.apache.hive.hcatalog.templeton.tool.TempletonUtils; + +/** + * Submit a Pig job. + * + * This is the backend of the pig web service. + */ +public class PigDelegator extends LauncherDelegator { + public PigDelegator(AppConfig appConf) { + super(appConf); + } + + public EnqueueBean run(String user, + String execute, String srcFile, + List pigArgs, String otherFiles, + String statusdir, String callback, String completedUrl) + throws NotAuthorizedException, BadParam, BusyException, QueueException, + ExecuteException, IOException, InterruptedException { + runAs = user; + List args = makeArgs(execute, + srcFile, pigArgs, + otherFiles, statusdir, completedUrl); + + return enqueueController(user, callback, args); + } + + private List makeArgs(String execute, String srcFile, + List pigArgs, String otherFiles, + String statusdir, String completedUrl) + throws BadParam, IOException, InterruptedException { + ArrayList args = new ArrayList(); + try { + ArrayList allFiles = new ArrayList(); + if (TempletonUtils.isset(srcFile)) + allFiles.add(TempletonUtils.hadoopFsFilename + (srcFile, appConf, runAs)); + if (TempletonUtils.isset(otherFiles)) { + String[] ofs = TempletonUtils.hadoopFsListAsArray(otherFiles, appConf, runAs); + allFiles.addAll(Arrays.asList(ofs)); + } + + args.addAll(makeLauncherArgs(appConf, statusdir, completedUrl, allFiles)); + args.add("-archives"); + args.add(appConf.pigArchive()); + + args.add("--"); + args.add(appConf.pigPath()); + //the token file location should be first argument of pig + args.add("-D" + TempletonControllerJob.TOKEN_FILE_ARG_PLACEHOLDER); + + args.addAll(pigArgs); + if (TempletonUtils.isset(execute)) { + args.add("-execute"); + args.add(execute); + } else if (TempletonUtils.isset(srcFile)) { + args.add("-file"); + args.add(TempletonUtils.hadoopFsPath(srcFile, appConf, runAs) + .getName()); + } + } catch (FileNotFoundException e) { + throw new BadParam(e.getMessage()); + } catch (URISyntaxException e) { + throw new BadParam(e.getMessage()); + } + + return args; + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ProxyUserSupport.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ProxyUserSupport.java new file mode 100644 index 0000000..b215351 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/ProxyUserSupport.java @@ -0,0 +1,241 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.security.Groups; + +import java.io.IOException; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.text.MessageFormat; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * When WebHCat is run with doAs query parameter this class ensures that user making the + * call is allowed to impersonate doAs user and is making a call from authorized host. + */ +final class ProxyUserSupport { + private static final Log LOG = LogFactory.getLog(ProxyUserSupport.class); + private static final String CONF_PROXYUSER_PREFIX = "webhcat.proxyuser."; + private static final String CONF_GROUPS_SUFFIX = ".groups"; + private static final String CONF_HOSTS_SUFFIX = ".hosts"; + private static final Set WILD_CARD = Collections.unmodifiableSet(new HashSet(0)); + private static final Map> proxyUserGroups = new HashMap>(); + private static final Map> proxyUserHosts = new HashMap>(); + + static void processProxyuserConfig(AppConfig conf) { + for(Map.Entry confEnt : conf) { + if(confEnt.getKey().startsWith(CONF_PROXYUSER_PREFIX) + && confEnt.getKey().endsWith(CONF_GROUPS_SUFFIX)) { + //process user groups for which doAs is authorized + String proxyUser = + confEnt.getKey().substring(CONF_PROXYUSER_PREFIX.length(), + confEnt.getKey().lastIndexOf(CONF_GROUPS_SUFFIX)); + Set groups; + if("*".equals(confEnt.getValue())) { + groups = WILD_CARD; + if(LOG.isDebugEnabled()) { + LOG.debug("User [" + proxyUser + "] is authorized to do doAs any user."); + } + } + else if(confEnt.getValue() != null && confEnt.getValue().trim().length() > 0) { + groups = new HashSet(Arrays.asList(confEnt.getValue().trim().split(","))); + if(LOG.isDebugEnabled()) { + LOG.debug("User [" + proxyUser + + "] is authorized to do doAs for users in the following groups: [" + + confEnt.getValue().trim() + "]"); + } + } + else { + groups = Collections.emptySet(); + if(LOG.isDebugEnabled()) { + LOG.debug("User [" + proxyUser + + "] is authorized to do doAs for users in the following groups: []"); + } + } + proxyUserGroups.put(proxyUser, groups); + } + else if(confEnt.getKey().startsWith(CONF_PROXYUSER_PREFIX) + && confEnt.getKey().endsWith(CONF_HOSTS_SUFFIX)) { + //process hosts from which doAs requests are authorized + String proxyUser = confEnt.getKey().substring(CONF_PROXYUSER_PREFIX.length(), + confEnt.getKey().lastIndexOf(CONF_HOSTS_SUFFIX)); + Set hosts; + if("*".equals(confEnt.getValue())) { + hosts = WILD_CARD; + if(LOG.isDebugEnabled()) { + LOG.debug("User [" + proxyUser + "] is authorized to do doAs from any host."); + } + } + else if(confEnt.getValue() != null && confEnt.getValue().trim().length() > 0) { + String[] hostValues = confEnt.getValue().trim().split(","); + hosts = new HashSet(); + for(String hostname : hostValues) { + String nhn = normalizeHostname(hostname); + if(nhn != null) { + hosts.add(nhn); + } + } + if(LOG.isDebugEnabled()) { + LOG.debug("User [" + proxyUser + + "] is authorized to do doAs from the following hosts: [" + + confEnt.getValue().trim() + "]"); + } + } + else { + hosts = Collections.emptySet(); + if(LOG.isDebugEnabled()) { + LOG.debug("User [" + proxyUser + + "] is authorized to do doAs from the following hosts: []"); + } + } + proxyUserHosts.put(proxyUser, hosts); + } + } + } + /** + * Verifies a that proxyUser is making the request from authorized host and that doAs user + * belongs to one of the groups for which proxyUser is allowed to impersonate users. + * + * @param proxyUser user name of the proxy (logged in) user. + * @param proxyHost host the proxy user is making the request from. + * @param doAsUser user the proxy user is impersonating. + * @throws NotAuthorizedException thrown if the user is not allowed to perform the proxyuser request. + */ + static void validate(String proxyUser, String proxyHost, String doAsUser) throws + NotAuthorizedException { + assertNotEmpty(proxyUser, "proxyUser", + "If you're attempting to use user-impersonation via a proxy user, please make sure that " + + CONF_PROXYUSER_PREFIX + "#USER#" + CONF_HOSTS_SUFFIX + " and " + + CONF_PROXYUSER_PREFIX + "#USER#" + CONF_GROUPS_SUFFIX + + " are configured correctly"); + assertNotEmpty(proxyHost, "proxyHost", + "If you're attempting to use user-impersonation via a proxy user, please make sure that " + + CONF_PROXYUSER_PREFIX + proxyUser + CONF_HOSTS_SUFFIX + " and " + + CONF_PROXYUSER_PREFIX + proxyUser + CONF_GROUPS_SUFFIX + + " are configured correctly"); + assertNotEmpty(doAsUser, Server.DO_AS_PARAM); + LOG.debug(MessageFormat.format("Authorization check proxyuser [{0}] host [{1}] doAs [{2}]", + proxyUser, proxyHost, doAsUser)); + if (proxyUserHosts.containsKey(proxyUser)) { + proxyHost = normalizeHostname(proxyHost); + validateRequestorHost(proxyUser, proxyHost); + validateGroup(proxyUser, doAsUser); + } + else { + throw new NotAuthorizedException(MessageFormat.format( + "User [{0}] not defined as proxyuser", proxyUser)); + } + } + + private static void validateRequestorHost(String proxyUser, String hostname) throws + NotAuthorizedException { + Set validHosts = proxyUserHosts.get(proxyUser); + if (validHosts == WILD_CARD) { + return; + } + if (validHosts == null || !validHosts.contains(hostname)) { + throw new NotAuthorizedException(MessageFormat.format( + "Unauthorized host [{0}] for proxyuser [{1}]", hostname, proxyUser)); + } + } + + private static void validateGroup(String proxyUser, String doAsUser) throws + NotAuthorizedException { + Set validGroups = proxyUserGroups.get(proxyUser); + if(validGroups == WILD_CARD) { + return; + } + else if(validGroups == null || validGroups.isEmpty()) { + throw new NotAuthorizedException( + MessageFormat.format( + "Unauthorized proxyuser [{0}] for doAsUser [{1}], not in proxyuser groups", + proxyUser, doAsUser)); + } + Groups groupsInfo = new Groups(Main.getAppConfigInstance()); + try { + List userGroups = groupsInfo.getGroups(doAsUser); + for (String g : validGroups) { + if (userGroups.contains(g)) { + return; + } + } + } + catch (IOException ex) {//thrown, for example, if there is no such user on the system + LOG.warn(MessageFormat.format("Unable to get list of groups for doAsUser [{0}].", + doAsUser), ex); + } + throw new NotAuthorizedException( + MessageFormat.format( + "Unauthorized proxyuser [{0}] for doAsUser [{1}], not in proxyuser groups", + proxyUser, doAsUser)); + } + + private static String normalizeHostname(String name) { + try { + InetAddress address = InetAddress.getByName( + "localhost".equalsIgnoreCase(name) ? null : name); + return address.getCanonicalHostName(); + } + catch (UnknownHostException ex) { + LOG.warn(MessageFormat.format("Unable to normalize hostname [{0}]", name)); + return null; + } + } + /** + * Check that a string is not null and not empty. If null or empty + * throws an IllegalArgumentException. + * + * @param str value. + * @param name parameter name for the exception message. + * @return the given value. + */ + private static String assertNotEmpty(String str, String name) { + return assertNotEmpty(str, name, null); + } + + /** + * Check that a string is not null and not empty. If null or empty + * throws an IllegalArgumentException. + * + * @param str value. + * @param name parameter name for the exception message. + * @param info additional information to be printed with the exception message + * @return the given value. + */ + private static String assertNotEmpty(String str, String name, String info) { + if (str == null) { + throw new IllegalArgumentException( + name + " cannot be null" + (info == null ? "" : ", " + info)); + } + if (str.length() == 0) { + throw new IllegalArgumentException( + name + " cannot be empty" + (info == null ? "" : ", " + info)); + } + return str; + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/QueueException.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/QueueException.java new file mode 100644 index 0000000..a35be92 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/QueueException.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import org.eclipse.jetty.http.HttpStatus; + +/** + * Unable to queue the job + */ +public class QueueException extends SimpleWebException { + public QueueException(String msg) { + super(HttpStatus.INTERNAL_SERVER_ERROR_500, msg); + } + +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/QueueStatusBean.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/QueueStatusBean.java new file mode 100644 index 0000000..6e9ded8 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/QueueStatusBean.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.io.IOException; + +import org.apache.hadoop.mapred.JobStatus; +import org.apache.hadoop.mapred.JobProfile; +import org.apache.hive.hcatalog.templeton.tool.JobState; + +/** + * QueueStatusBean - The results of an exec call. + */ +public class QueueStatusBean { + public JobStatus status; + public JobProfile profile; + + public String id; + public String parentId; + public String percentComplete; + public Long exitValue; + public String user; + public String callback; + public String completed; + + public QueueStatusBean() { + } + + /** + * Create a new QueueStatusBean + * + * @param state store job state + * @param status job status + * @param profile job profile + */ + public QueueStatusBean(JobState state, JobStatus status, JobProfile profile) + throws IOException { + this.status = status; + this.profile = profile; + + id = profile.getJobID().toString(); + parentId = state.getId(); + if (id.equals(parentId)) + parentId = null; + percentComplete = state.getPercentComplete(); + exitValue = state.getExitValue(); + user = state.getUser(); + callback = state.getCallback(); + completed = state.getCompleteStatus(); + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SecureProxySupport.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SecureProxySupport.java new file mode 100644 index 0000000..8dbcf9a --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SecureProxySupport.java @@ -0,0 +1,191 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.io.File; +import java.io.IOException; +import java.security.PrivilegedExceptionAction; +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.io.Text; +import org.apache.thrift.TException; +import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.Token; + +/** + * Helper class to run jobs using Kerberos security. Always safe to + * use these methods, it's a noop if security is not enabled. + */ +public class SecureProxySupport { + private Path tokenPath; + private final String HCAT_SERVICE = "hcat"; + private boolean isEnabled; + private String user; + + public SecureProxySupport() { + isEnabled = UserGroupInformation.isSecurityEnabled(); + } + + private static final Log LOG = LogFactory.getLog(SecureProxySupport.class); + + /** + * The file where we store the auth token + */ + public Path getTokenPath() { + return (tokenPath); + } + + /** + * The token to pass to hcat. + */ + public String getHcatServiceStr() { + return (HCAT_SERVICE); + } + + /** + * Create the delegation token. + */ + public Path open(String user, Configuration conf) + throws IOException, InterruptedException { + close(); + if (isEnabled) { + this.user = user; + File t = File.createTempFile("templeton", null); + tokenPath = new Path(t.toURI()); + Token fsToken = getFSDelegationToken(user, conf); + String hcatTokenStr; + try { + hcatTokenStr = buildHcatDelegationToken(user); + } catch (Exception e) { + throw new IOException(e); + } + Token msToken = new Token(); + msToken.decodeFromUrlString(hcatTokenStr); + msToken.setService(new Text(HCAT_SERVICE)); + writeProxyDelegationTokens(fsToken, msToken, conf, user, tokenPath); + + } + return tokenPath; + } + + /** + * Cleanup + */ + public void close() { + if (tokenPath != null) { + new File(tokenPath.toUri()).delete(); + tokenPath = null; + } + } + + /** + * Add Hadoop env variables. + */ + public void addEnv(Map env) { + if (isEnabled) { + env.put(UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION, + getTokenPath().toUri().getPath()); + } + } + + /** + * Add hcat args. + */ + public void addArgs(List args) { + if (isEnabled) { + args.add("-D"); + args.add("hive.metastore.token.signature=" + getHcatServiceStr()); + args.add("-D"); + args.add("proxy.user.name=" + user); + } + } + + class TokenWrapper { + Token token; + } + + private Token getFSDelegationToken(String user, + final Configuration conf) + throws IOException, InterruptedException { + LOG.info("user: " + user + " loginUser: " + UserGroupInformation.getLoginUser().getUserName()); + final UserGroupInformation ugi = UgiFactory.getUgi(user); + + final TokenWrapper twrapper = new TokenWrapper(); + ugi.doAs(new PrivilegedExceptionAction() { + public Object run() throws IOException { + FileSystem fs = FileSystem.get(conf); + twrapper.token = fs.getDelegationToken(ugi.getShortUserName()); + return null; + } + }); + return twrapper.token; + + } + + private void writeProxyDelegationTokens(final Token fsToken, + final Token msToken, + final Configuration conf, + String user, + final Path tokenPath) + throws IOException, InterruptedException { + + + LOG.info("user: " + user + " loginUser: " + UserGroupInformation.getLoginUser().getUserName()); + final UserGroupInformation ugi = UgiFactory.getUgi(user); + + + ugi.doAs(new PrivilegedExceptionAction() { + public Object run() throws IOException { + Credentials cred = new Credentials(); + cred.addToken(fsToken.getService(), fsToken); + cred.addToken(msToken.getService(), msToken); + cred.writeTokenStorageFile(tokenPath, conf); + return null; + } + }); + + } + + private String buildHcatDelegationToken(String user) + throws IOException, InterruptedException, MetaException, TException { + HiveConf c = new HiveConf(); + final HiveMetaStoreClient client = new HiveMetaStoreClient(c); + LOG.info("user: " + user + " loginUser: " + UserGroupInformation.getLoginUser().getUserName()); + final TokenWrapper twrapper = new TokenWrapper(); + final UserGroupInformation ugi = UgiFactory.getUgi(user); + String s = ugi.doAs(new PrivilegedExceptionAction() { + public String run() + throws IOException, MetaException, TException { + String u = ugi.getUserName(); + return client.getDelegationToken(u); + } + }); + return s; + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java new file mode 100644 index 0000000..fefb259 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/Server.java @@ -0,0 +1,856 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.io.IOException; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.text.MessageFormat; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.DELETE; +import javax.ws.rs.FormParam; +import javax.ws.rs.GET; +import javax.ws.rs.POST; +import javax.ws.rs.PUT; +import javax.ws.rs.Path; +import javax.ws.rs.PathParam; +import javax.ws.rs.Produces; +import javax.ws.rs.QueryParam; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import javax.ws.rs.core.SecurityContext; +import javax.ws.rs.core.UriInfo; + +import org.apache.commons.exec.ExecuteException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.authentication.client.PseudoAuthenticator; +import org.apache.hive.hcatalog.templeton.tool.TempletonUtils; + +/** + * The Templeton Web API server. + */ +@Path("/v1") +public class Server { + public static final String VERSION = "v1"; + public static final String DO_AS_PARAM = "doAs"; + + /** + * The status message. Always "ok" + */ + public static final Map STATUS_OK = createStatusMsg(); + + /** + * The list of supported api versions. + */ + public static final Map SUPPORTED_VERSIONS = createVersions(); + + /** + * The list of supported return formats. Always json. + */ + public static final Map SUPPORTED_FORMATS = createFormats(); + + // Build the status message for the /status call. + private static Map createStatusMsg() { + HashMap res = new HashMap(); + res.put("status", "ok"); + res.put("version", VERSION); + + return Collections.unmodifiableMap(res); + } + + // Build the versions list. + private static Map createVersions() { + ArrayList versions = new ArrayList(); + versions.add(VERSION); + + HashMap res = new HashMap(); + res.put("supportedVersions", versions); + res.put("version", VERSION); + + return Collections.unmodifiableMap(res); + } + + // Build the supported formats list + private static Map createFormats() { + ArrayList formats = new ArrayList(); + formats.add(MediaType.APPLICATION_JSON); + HashMap res = new HashMap(); + res.put("responseTypes", formats); + + return Collections.unmodifiableMap(res); + } + + protected static ExecService execService = ExecServiceImpl.getInstance(); + private static AppConfig appConf = Main.getAppConfigInstance(); + + // The SecurityContext set by AuthFilter + private + @Context + SecurityContext theSecurityContext; + + // The uri requested + private + @Context + UriInfo theUriInfo; + private @QueryParam(DO_AS_PARAM) String doAs; + private @Context HttpServletRequest request; + + private static final Log LOG = LogFactory.getLog(Server.class); + + /** + * Check the status of this server. Always OK. + */ + @GET + @Path("status") + @Produces({MediaType.APPLICATION_JSON}) + public Map status() { + return STATUS_OK; + } + + /** + * Check the supported request formats of this server. + */ + @GET + @Produces({MediaType.APPLICATION_JSON}) + public Map requestFormats() { + return SUPPORTED_FORMATS; + } + + /** + * Check the version(s) supported by this server. + */ + @GET + @Path("version") + @Produces({MediaType.APPLICATION_JSON}) + public Map version() { + return SUPPORTED_VERSIONS; + } + + /** + * Execute an hcat ddl expression on the local box. It is run + * as the authenticated user and rate limited. + */ + @POST + @Path("ddl") + @Produces({MediaType.APPLICATION_JSON}) + public ExecBean ddl(@FormParam("exec") String exec, + @FormParam("group") String group, + @FormParam("permissions") String permissions) + throws NotAuthorizedException, BusyException, BadParam, + ExecuteException, IOException { + verifyUser(); + verifyParam(exec, "exec"); + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.run(getDoAsUser(), exec, false, group, permissions); + } + + /** + * List all the tables in an hcat database. + */ + @GET + @Path("ddl/database/{db}/table") + @Produces(MediaType.APPLICATION_JSON) + public Response listTables(@PathParam("db") String db, + @QueryParam("like") String tablePattern) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + + HcatDelegator d = new HcatDelegator(appConf, execService); + if (!TempletonUtils.isset(tablePattern)) + tablePattern = "*"; + return d.listTables(getDoAsUser(), db, tablePattern); + } + + /** + * Create a new table. + */ + @PUT + @Path("ddl/database/{db}/table/{table}") + @Produces(MediaType.APPLICATION_JSON) + public Response createTable(@PathParam("db") String db, + @PathParam("table") String table, + TableDesc desc) + throws SimpleWebException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + desc.table = table; + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.createTable(getDoAsUser(), db, desc); + } + + /** + * Create a new table like another table. + */ + @PUT + @Path("ddl/database/{db}/table/{existingTable}/like/{newTable}") + @Produces(MediaType.APPLICATION_JSON) + public Response createTableLike(@PathParam("db") String db, + @PathParam("existingTable") String existingTable, + @PathParam("newTable") String newTable, + TableLikeDesc desc) + throws SimpleWebException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(existingTable, ":existingTable"); + verifyDdlParam(newTable, ":newTable"); + desc.existingTable = existingTable; + desc.newTable = newTable; + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.createTableLike(getDoAsUser(), db, desc); + } + + /** + * Describe an hcat table. This is normally a simple list of + * columns (using "desc table"), but the extended format will show + * more information (using "show table extended like"). + */ + @GET + @Path("ddl/database/{db}/table/{table}") + @Produces(MediaType.APPLICATION_JSON) + public Response descTable(@PathParam("db") String db, + @PathParam("table") String table, + @QueryParam("format") String format) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + + HcatDelegator d = new HcatDelegator(appConf, execService); + if ("extended".equals(format)) + return d.descExtendedTable(getDoAsUser(), db, table); + else + return d.descTable(getDoAsUser(), db, table, false); + } + + /** + * Drop an hcat table. + */ + @DELETE + @Path("ddl/database/{db}/table/{table}") + @Produces(MediaType.APPLICATION_JSON) + public Response dropTable(@PathParam("db") String db, + @PathParam("table") String table, + @QueryParam("ifExists") boolean ifExists, + @QueryParam("group") String group, + @QueryParam("permissions") String permissions) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.dropTable(getDoAsUser(), db, table, ifExists, group, permissions); + } + + /** + * Rename an hcat table. + */ + @POST + @Path("ddl/database/{db}/table/{table}") + @Produces(MediaType.APPLICATION_JSON) + public Response renameTable(@PathParam("db") String db, + @PathParam("table") String oldTable, + @FormParam("rename") String newTable, + @FormParam("group") String group, + @FormParam("permissions") String permissions) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(oldTable, ":table"); + verifyDdlParam(newTable, "rename"); + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.renameTable(getDoAsUser(), db, oldTable, newTable, group, permissions); + } + + /** + * Describe a single property on an hcat table. + */ + @GET + @Path("ddl/database/{db}/table/{table}/property/{property}") + @Produces(MediaType.APPLICATION_JSON) + public Response descOneTableProperty(@PathParam("db") String db, + @PathParam("table") String table, + @PathParam("property") String property) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + verifyDdlParam(property, ":property"); + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.descTableProperty(getDoAsUser(), db, table, property); + } + + /** + * List all the properties on an hcat table. + */ + @GET + @Path("ddl/database/{db}/table/{table}/property") + @Produces(MediaType.APPLICATION_JSON) + public Response listTableProperties(@PathParam("db") String db, + @PathParam("table") String table) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.listTableProperties(getDoAsUser(), db, table); + } + + /** + * Add a single property on an hcat table. + */ + @PUT + @Path("ddl/database/{db}/table/{table}/property/{property}") + @Produces(MediaType.APPLICATION_JSON) + public Response addOneTableProperty(@PathParam("db") String db, + @PathParam("table") String table, + @PathParam("property") String property, + TablePropertyDesc desc) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + verifyDdlParam(property, ":property"); + desc.name = property; + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.addOneTableProperty(getDoAsUser(), db, table, desc); + } + + /** + * List all the partitions in an hcat table. + */ + @GET + @Path("ddl/database/{db}/table/{table}/partition") + @Produces(MediaType.APPLICATION_JSON) + public Response listPartitions(@PathParam("db") String db, + @PathParam("table") String table) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.listPartitions(getDoAsUser(), db, table); + } + + /** + * Describe a single partition in an hcat table. + */ + @GET + @Path("ddl/database/{db}/table/{table}/partition/{partition}") + @Produces(MediaType.APPLICATION_JSON) + public Response descPartition(@PathParam("db") String db, + @PathParam("table") String table, + @PathParam("partition") String partition) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + verifyParam(partition, ":partition"); + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.descOnePartition(getDoAsUser(), db, table, partition); + } + + /** + * Create a partition in an hcat table. + */ + @PUT + @Path("ddl/database/{db}/table/{table}/partition/{partition}") + @Produces(MediaType.APPLICATION_JSON) + public Response addOnePartition(@PathParam("db") String db, + @PathParam("table") String table, + @PathParam("partition") String partition, + PartitionDesc desc) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + verifyParam(partition, ":partition"); + desc.partition = partition; + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.addOnePartition(getDoAsUser(), db, table, desc); + } + + /** + * Drop a partition in an hcat table. + */ + @DELETE + @Path("ddl/database/{db}/table/{table}/partition/{partition}") + @Produces(MediaType.APPLICATION_JSON) + public Response dropPartition(@PathParam("db") String db, + @PathParam("table") String table, + @PathParam("partition") String partition, + @QueryParam("ifExists") boolean ifExists, + @QueryParam("group") String group, + @QueryParam("permissions") String permissions) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + verifyParam(partition, ":partition"); + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.dropPartition(getDoAsUser(), db, table, partition, ifExists, + group, permissions); + } + + /** + * List all databases, or those that match a pattern. + */ + @GET + @Path("ddl/database/") + @Produces(MediaType.APPLICATION_JSON) + public Response listDatabases(@QueryParam("like") String dbPattern) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + + HcatDelegator d = new HcatDelegator(appConf, execService); + if (!TempletonUtils.isset(dbPattern)) + dbPattern = "*"; + return d.listDatabases(getDoAsUser(), dbPattern); + } + + /** + * Describe a database + */ + @GET + @Path("ddl/database/{db}") + @Produces(MediaType.APPLICATION_JSON) + public Response descDatabase(@PathParam("db") String db, + @QueryParam("format") String format) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.descDatabase(getDoAsUser(), db, "extended".equals(format)); + } + + /** + * Create a database + */ + @PUT + @Path("ddl/database/{db}") + @Produces(MediaType.APPLICATION_JSON) + public Response createDatabase(@PathParam("db") String db, + DatabaseDesc desc) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + desc.database = db; + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.createDatabase(getDoAsUser(), desc); + } + + /** + * Drop a database + */ + @DELETE + @Path("ddl/database/{db}") + @Produces(MediaType.APPLICATION_JSON) + public Response dropDatabase(@PathParam("db") String db, + @QueryParam("ifExists") boolean ifExists, + @QueryParam("option") String option, + @QueryParam("group") String group, + @QueryParam("permissions") String permissions) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + if (TempletonUtils.isset(option)) + verifyDdlParam(option, "option"); + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.dropDatabase(getDoAsUser(), db, ifExists, option, + group, permissions); + } + + /** + * List the columns in an hcat table. Currently the same as + * describe table. + */ + @GET + @Path("ddl/database/{db}/table/{table}/column") + @Produces(MediaType.APPLICATION_JSON) + public Response listColumns(@PathParam("db") String db, + @PathParam("table") String table) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.listColumns(getDoAsUser(), db, table); + } + + /** + * Describe a single column in an hcat table. + */ + @GET + @Path("ddl/database/{db}/table/{table}/column/{column}") + @Produces(MediaType.APPLICATION_JSON) + public Response descColumn(@PathParam("db") String db, + @PathParam("table") String table, + @PathParam("column") String column) + throws SimpleWebException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + verifyParam(column, ":column"); + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.descOneColumn(getDoAsUser(), db, table, column); + } + + /** + * Create a column in an hcat table. + */ + @PUT + @Path("ddl/database/{db}/table/{table}/column/{column}") + @Produces(MediaType.APPLICATION_JSON) + public Response addOneColumn(@PathParam("db") String db, + @PathParam("table") String table, + @PathParam("column") String column, + ColumnDesc desc) + throws HcatException, NotAuthorizedException, BusyException, + BadParam, ExecuteException, IOException { + verifyUser(); + verifyDdlParam(db, ":db"); + verifyDdlParam(table, ":table"); + verifyParam(column, ":column"); + verifyParam(desc.type, "type"); + desc.name = column; + + HcatDelegator d = new HcatDelegator(appConf, execService); + return d.addOneColumn(getDoAsUser(), db, table, desc); + } + + /** + * Run a MapReduce Streaming job. + */ + @POST + @Path("mapreduce/streaming") + @Produces({MediaType.APPLICATION_JSON}) + public EnqueueBean mapReduceStreaming(@FormParam("input") List inputs, + @FormParam("output") String output, + @FormParam("mapper") String mapper, + @FormParam("reducer") String reducer, + @FormParam("file") List files, + @FormParam("define") List defines, + @FormParam("cmdenv") List cmdenvs, + @FormParam("arg") List args, + @FormParam("statusdir") String statusdir, + @FormParam("callback") String callback) + throws NotAuthorizedException, BusyException, BadParam, QueueException, + ExecuteException, IOException, InterruptedException { + verifyUser(); + verifyParam(inputs, "input"); + verifyParam(mapper, "mapper"); + verifyParam(reducer, "reducer"); + + StreamingDelegator d = new StreamingDelegator(appConf); + return d.run(getDoAsUser(), inputs, output, mapper, reducer, + files, defines, cmdenvs, args, + statusdir, callback, getCompletedUrl()); + } + + /** + * Run a MapReduce Jar job. + */ + @POST + @Path("mapreduce/jar") + @Produces({MediaType.APPLICATION_JSON}) + public EnqueueBean mapReduceJar(@FormParam("jar") String jar, + @FormParam("class") String mainClass, + @FormParam("libjars") String libjars, + @FormParam("files") String files, + @FormParam("arg") List args, + @FormParam("define") List defines, + @FormParam("statusdir") String statusdir, + @FormParam("callback") String callback) + throws NotAuthorizedException, BusyException, BadParam, QueueException, + ExecuteException, IOException, InterruptedException { + verifyUser(); + verifyParam(jar, "jar"); + verifyParam(mainClass, "class"); + + JarDelegator d = new JarDelegator(appConf); + return d.run(getDoAsUser(), + jar, mainClass, + libjars, files, args, defines, + statusdir, callback, getCompletedUrl()); + } + + /** + * Run a Pig job. + */ + @POST + @Path("pig") + @Produces({MediaType.APPLICATION_JSON}) + public EnqueueBean pig(@FormParam("execute") String execute, + @FormParam("file") String srcFile, + @FormParam("arg") List pigArgs, + @FormParam("files") String otherFiles, + @FormParam("statusdir") String statusdir, + @FormParam("callback") String callback) + throws NotAuthorizedException, BusyException, BadParam, QueueException, + ExecuteException, IOException, InterruptedException { + verifyUser(); + if (execute == null && srcFile == null) + throw new BadParam("Either execute or file parameter required"); + + PigDelegator d = new PigDelegator(appConf); + return d.run(getDoAsUser(), + execute, srcFile, + pigArgs, otherFiles, + statusdir, callback, getCompletedUrl()); + } + + /** + * Run a Hive job. + */ + @POST + @Path("hive") + @Produces({MediaType.APPLICATION_JSON}) + public EnqueueBean hive(@FormParam("execute") String execute, + @FormParam("file") String srcFile, + @FormParam("define") List defines, + @FormParam("statusdir") String statusdir, + @FormParam("callback") String callback) + throws NotAuthorizedException, BusyException, BadParam, QueueException, + ExecuteException, IOException, InterruptedException { + verifyUser(); + if (execute == null && srcFile == null) + throw new BadParam("Either execute or file parameter required"); + + HiveDelegator d = new HiveDelegator(appConf); + return d.run(getDoAsUser(), execute, srcFile, defines, + statusdir, callback, getCompletedUrl()); + } + + /** + * Return the status of the jobid. + */ + @GET + @Path("queue/{jobid}") + @Produces({MediaType.APPLICATION_JSON}) + public QueueStatusBean showQueueId(@PathParam("jobid") String jobid) + throws NotAuthorizedException, BadParam, IOException, InterruptedException { + + verifyUser(); + verifyParam(jobid, ":jobid"); + + StatusDelegator d = new StatusDelegator(appConf); + return d.run(getDoAsUser(), jobid); + } + + /** + * Kill a job in the queue. + */ + @DELETE + @Path("queue/{jobid}") + @Produces({MediaType.APPLICATION_JSON}) + public QueueStatusBean deleteQueueId(@PathParam("jobid") String jobid) + throws NotAuthorizedException, BadParam, IOException, InterruptedException { + + verifyUser(); + verifyParam(jobid, ":jobid"); + + DeleteDelegator d = new DeleteDelegator(appConf); + return d.run(getDoAsUser(), jobid); + } + + /** + * Return all the known job ids for this user. + */ + @GET + @Path("queue") + @Produces({MediaType.APPLICATION_JSON}) + public List showQueueList(@QueryParam("showall") boolean showall) + throws NotAuthorizedException, BadParam, IOException, InterruptedException { + + verifyUser(); + + ListDelegator d = new ListDelegator(appConf); + return d.run(getDoAsUser(), showall); + } + + /** + * Notify on a completed job. + */ + @GET + @Path("internal/complete/{jobid}") + @Produces({MediaType.APPLICATION_JSON}) + public CompleteBean completeJob(@PathParam("jobid") String jobid) + throws CallbackFailedException, IOException { + CompleteDelegator d = new CompleteDelegator(appConf); + return d.run(jobid); + } + + /** + * Verify that we have a valid user. Throw an exception if invalid. + */ + public void verifyUser() throws NotAuthorizedException { + String requestingUser = getRequestingUser(); + if (requestingUser == null) { + String msg = "No user found."; + if (!UserGroupInformation.isSecurityEnabled()) + msg += " Missing " + PseudoAuthenticator.USER_NAME + " parameter."; + throw new NotAuthorizedException(msg); + } + if(doAs != null && !doAs.equals(requestingUser)) { + /*if doAs user is different than logged in user, need to check that + that logged in user is authorized to run as 'doAs'*/ + ProxyUserSupport.validate(requestingUser, getRequestingHost(requestingUser, request), doAs); + } + } + /** + * All 'tasks' spawned by WebHCat should be run as this user. W/o doAs query parameter + * this is just the user making the request (or + * {@link org.apache.hadoop.security.authentication.client.PseudoAuthenticator#USER_NAME} + * query param). + * @return value of doAs query parameter or {@link #getRequestingUser()} + */ + private String getDoAsUser() { + return doAs != null && !doAs.equals(getRequestingUser()) ? doAs : getRequestingUser(); + } + /** + * Verify that the parameter exists. Throw an exception if invalid. + */ + public void verifyParam(String param, String name) + throws BadParam { + if (param == null) + throw new BadParam("Missing " + name + " parameter"); + } + + /** + * Verify that the parameter exists. Throw an exception if invalid. + */ + public void verifyParam(List param, String name) + throws BadParam { + if (param == null || param.isEmpty()) + throw new BadParam("Missing " + name + " parameter"); + } + + public static final Pattern DDL_ID = Pattern.compile("[a-zA-Z]\\w*"); + + /** + * Verify that the parameter exists and is a simple DDL identifier + * name. Throw an exception if invalid. + * + * Bug: This needs to allow for quoted ddl identifiers. + */ + public void verifyDdlParam(String param, String name) + throws BadParam { + verifyParam(param, name); + Matcher m = DDL_ID.matcher(param); + if (!m.matches()) + throw new BadParam("Invalid DDL identifier " + name); + } + /** + * Get the user name from the security context, i.e. the user making the HTTP request. + * With simple/pseudo security mode this should return the + * value of user.name query param, in kerberos mode it's the kinit'ed user. + */ + private String getRequestingUser() { + if (theSecurityContext == null) + return null; + if (theSecurityContext.getUserPrincipal() == null) + return null; + //map hue/foo.bar@something.com->hue since user group checks + // and config files are in terms of short name + return UserGroupInformation.createRemoteUser( + theSecurityContext.getUserPrincipal().getName()).getShortUserName(); + } + + /** + * The callback url on this server when a task is completed. + */ + public String getCompletedUrl() { + if (theUriInfo == null) + return null; + if (theUriInfo.getBaseUri() == null) + return null; + return theUriInfo.getBaseUri() + VERSION + + "/internal/complete/$jobId"; + } + /** + * Returns canonical host name from which the request is made; used for doAs validation + */ + private static String getRequestingHost(String requestingUser, HttpServletRequest request) { + final String unkHost = "???"; + if(request == null) { + LOG.warn("request is null; cannot determine hostname"); + return unkHost; + } + try { + String address = request.getRemoteAddr();//returns IP addr + if(address == null) { + LOG.warn(MessageFormat.format("Request remote address is NULL for user [{0}]", requestingUser)); + return unkHost; + } + + //Inet4Address/Inet6Address + String hostName = InetAddress.getByName(address).getCanonicalHostName(); + if(LOG.isDebugEnabled()) { + LOG.debug(MessageFormat.format("Resolved remote hostname: [{0}]", hostName)); + } + return hostName; + + } catch (UnknownHostException ex) { + LOG.warn(MessageFormat.format("Request remote address could not be resolved, {0}", ex.toString(), ex)); + return unkHost; + } + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SimpleExceptionMapper.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SimpleExceptionMapper.java new file mode 100644 index 0000000..0619b0f --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SimpleExceptionMapper.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import javax.ws.rs.core.Response; +import javax.ws.rs.ext.ExceptionMapper; +import javax.ws.rs.ext.Provider; + +/** + * Map our exceptions to the Jersey response. This lets us have nice + * results in the error body. + */ +@Provider +public class SimpleExceptionMapper + implements ExceptionMapper { + public Response toResponse(SimpleWebException e) { + return e.getResponse(); + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SimpleWebException.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SimpleWebException.java new file mode 100644 index 0000000..697219b --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/SimpleWebException.java @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.io.IOException; +import java.util.Map; +import java.util.HashMap; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; + +import org.codehaus.jackson.map.ObjectMapper; + +/** + * Simple exception that will return a json error payload if thrown + * from a JAX web server. We skip using WebApplicationException and + * instead map our own so that Jersey doesn't log our exceptions as + * error in the output log. See SimpleExceptionMapper. + */ +public class SimpleWebException extends Throwable { + public int httpCode; + public Map params; + + public SimpleWebException(int httpCode, String msg) { + super(msg); + this.httpCode = httpCode; + } + + public SimpleWebException(int httpCode, String msg, Map params) { + super(msg); + this.httpCode = httpCode; + this.params = params; + } + + public Response getResponse() { + return buildMessage(httpCode, params, getMessage()); + } + + public static Response buildMessage(int httpCode, Map params, + String msg) { + HashMap err = new HashMap(); + err.put("error", msg); + if (params != null) + err.putAll(params); + + String json = "\"error\""; + try { + json = new ObjectMapper().writeValueAsString(err); + } catch (IOException e) { + } + + return Response.status(httpCode) + .entity(json) + .type(MediaType.APPLICATION_JSON) + .build(); + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/StatusDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/StatusDelegator.java new file mode 100644 index 0000000..b50b9c1 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/StatusDelegator.java @@ -0,0 +1,111 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.shims.HadoopShims.WebHCatJTShim; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.mapred.JobID; +import org.apache.hadoop.mapred.JobProfile; +import org.apache.hadoop.mapred.JobStatus; +import org.apache.hive.hcatalog.templeton.tool.JobState; + +/** + * Fetch the status of a given job id in the queue. + */ +public class StatusDelegator extends TempletonDelegator { + private static final Log LOG = LogFactory.getLog(StatusDelegator.class); + + public StatusDelegator(AppConfig appConf) { + super(appConf); + } + + public QueueStatusBean run(String user, String id) + throws NotAuthorizedException, BadParam, IOException, InterruptedException + { + WebHCatJTShim tracker = null; + JobState state = null; + try { + UserGroupInformation ugi = UgiFactory.getUgi(user); + tracker = ShimLoader.getHadoopShims().getWebHCatShim(appConf, ugi); + JobID jobid = StatusDelegator.StringToJobID(id); + if (jobid == null) + throw new BadParam("Invalid jobid: " + id); + state = new JobState(id, Main.getAppConfigInstance()); + return StatusDelegator.makeStatus(tracker, jobid, state); + } catch (IllegalStateException e) { + throw new BadParam(e.getMessage()); + } finally { + if (tracker != null) + tracker.close(); + if (state != null) + state.close(); + } + } + + public static QueueStatusBean makeStatus(WebHCatJTShim tracker, + JobID jobid, + String childid, + JobState state) + throws BadParam, IOException { + JobID bestid = jobid; + if (childid != null) + bestid = StatusDelegator.StringToJobID(childid); + + JobStatus status = tracker.getJobStatus(bestid); + JobProfile profile = tracker.getJobProfile(bestid); + + if (status == null || profile == null) { + if (bestid != jobid) { // Corrupt childid, retry. + LOG.error("Corrupt child id " + childid + " for " + jobid); + bestid = jobid; + status = tracker.getJobStatus(bestid); + profile = tracker.getJobProfile(bestid); + } + } + + if (status == null || profile == null) // No such job. + throw new BadParam("Could not find job " + bestid); + + return new QueueStatusBean(state, status, profile); + } + + public static QueueStatusBean makeStatus(WebHCatJTShim tracker, + JobID jobid, + JobState state) + throws BadParam, IOException { + return makeStatus(tracker, jobid, state.getChildId(), state); + } + + /** + * A version of JobID.forName with our app specific error handling. + */ + public static JobID StringToJobID(String id) + throws BadParam { + try { + return JobID.forName(id); + } catch (IllegalArgumentException e) { + throw new BadParam(e.getMessage()); + } + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/StreamingDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/StreamingDelegator.java new file mode 100644 index 0000000..edef153 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/StreamingDelegator.java @@ -0,0 +1,89 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.exec.ExecuteException; + +/** + * Submit a streaming job to the MapReduce queue. Really just a front + end to the JarDelegator. + * + * This is the backend of the mapreduce/streaming web service. + */ +public class StreamingDelegator extends LauncherDelegator { + public StreamingDelegator(AppConfig appConf) { + super(appConf); + } + + public EnqueueBean run(String user, + List inputs, String output, + String mapper, String reducer, + List files, List defines, + List cmdenvs, + List jarArgs, + String statusdir, + String callback, + String completedUrl) + throws NotAuthorizedException, BadParam, BusyException, QueueException, + ExecuteException, IOException, InterruptedException { + List args = makeArgs(inputs, output, mapper, reducer, + files, defines, cmdenvs, jarArgs); + + JarDelegator d = new JarDelegator(appConf); + return d.run(user, + appConf.streamingJar(), null, + null, null, args, defines, + statusdir, callback, completedUrl); + } + + private List makeArgs(List inputs, + String output, + String mapper, + String reducer, + List files, + List defines, + List cmdenvs, + List jarArgs) { + ArrayList args = new ArrayList(); + for (String input : inputs) { + args.add("-input"); + args.add(input); + } + args.add("-output"); + args.add(output); + args.add("-mapper"); + args.add(mapper); + args.add("-reducer"); + args.add(reducer); + + for (String f : files) + args.add("-file" + f); + for (String d : defines) + args.add("-D" + d); + for (String e : cmdenvs) + args.add("-cmdenv" + e); + args.addAll(jarArgs); + + return args; + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TableDesc.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TableDesc.java new file mode 100644 index 0000000..9ee9b52 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TableDesc.java @@ -0,0 +1,245 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.util.List; +import java.util.Map; +import javax.xml.bind.annotation.XmlRootElement; + +/** + * A description of the table to create. + */ +@XmlRootElement +public class TableDesc extends GroupPermissionsDesc { + public boolean external = false; + public boolean ifNotExists = false; + public String table; + public String comment; + public List columns; + public List partitionedBy; + public ClusteredByDesc clusteredBy; + public StorageFormatDesc format; + public String location; + public Map tableProperties; + + /** + * Create a new TableDesc + */ + public TableDesc() { + } + + public String toString() { + return String.format("TableDesc(table=%s, columns=%s)", table, columns); + } + + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof TableDesc)) + return false; + TableDesc that = (TableDesc) o; + return xequals(this.external, that.external) + && xequals(this.ifNotExists, that.ifNotExists) + && xequals(this.table, that.table) + && xequals(this.comment, that.comment) + && xequals(this.columns, that.columns) + && xequals(this.partitionedBy, that.partitionedBy) + && xequals(this.clusteredBy, that.clusteredBy) + && xequals(this.format, that.format) + && xequals(this.location, that.location) + && xequals(this.tableProperties, that.tableProperties) + && super.equals(that) + ; + } + + /** + * How to cluster the table. + */ + @XmlRootElement + public static class ClusteredByDesc { + public List columnNames; + public List sortedBy; + public int numberOfBuckets; + + public ClusteredByDesc() { + } + + public String toString() { + String fmt + = "ClusteredByDesc(columnNames=%s, sortedBy=%s, numberOfBuckets=%s)"; + return String.format(fmt, columnNames, sortedBy, numberOfBuckets); + } + + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof ClusteredByDesc)) + return false; + ClusteredByDesc that = (ClusteredByDesc) o; + return xequals(this.columnNames, that.columnNames) + && xequals(this.sortedBy, that.sortedBy) + && xequals(this.numberOfBuckets, that.numberOfBuckets) + ; + } + } + + /** + * The clustered sort order. + */ + @XmlRootElement + public static class ClusterSortOrderDesc { + public String columnName; + public SortDirectionDesc order; + + public ClusterSortOrderDesc() { + } + + public ClusterSortOrderDesc(String columnName, SortDirectionDesc order) { + this.columnName = columnName; + this.order = order; + } + + public String toString() { + return String + .format("ClusterSortOrderDesc(columnName=%s, order=%s)", + columnName, order); + } + + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof ClusterSortOrderDesc)) + return false; + ClusterSortOrderDesc that = (ClusterSortOrderDesc) o; + return xequals(this.columnName, that.columnName) + && xequals(this.order, that.order) + ; + } + } + + /** + * Ther ASC or DESC sort order. + */ + @XmlRootElement + public static enum SortDirectionDesc { + ASC, DESC + } + + /** + * The storage format. + */ + @XmlRootElement + public static class StorageFormatDesc { + public RowFormatDesc rowFormat; + public String storedAs; + public StoredByDesc storedBy; + + public StorageFormatDesc() { + } + + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof StorageFormatDesc)) + return false; + StorageFormatDesc that = (StorageFormatDesc) o; + return xequals(this.rowFormat, that.rowFormat) + && xequals(this.storedAs, that.storedAs) + && xequals(this.storedBy, that.storedBy) + ; + } + } + + /** + * The Row Format. + */ + @XmlRootElement + public static class RowFormatDesc { + public String fieldsTerminatedBy; + public String collectionItemsTerminatedBy; + public String mapKeysTerminatedBy; + public String linesTerminatedBy; + public SerdeDesc serde; + + public RowFormatDesc() { + } + + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof RowFormatDesc)) + return false; + RowFormatDesc that = (RowFormatDesc) o; + return xequals(this.fieldsTerminatedBy, that.fieldsTerminatedBy) + && xequals(this.collectionItemsTerminatedBy, + that.collectionItemsTerminatedBy) + && xequals(this.mapKeysTerminatedBy, that.mapKeysTerminatedBy) + && xequals(this.linesTerminatedBy, that.linesTerminatedBy) + && xequals(this.serde, that.serde) + ; + } + } + + /** + * The SERDE Row Format. + */ + @XmlRootElement + public static class SerdeDesc { + public String name; + public Map properties; + + public SerdeDesc() { + } + + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof SerdeDesc)) + return false; + SerdeDesc that = (SerdeDesc) o; + return xequals(this.name, that.name) + && xequals(this.properties, that.properties) + ; + } + } + + /** + * How to store the table. + */ + @XmlRootElement + public static class StoredByDesc { + public String className; + public Map properties; + + public StoredByDesc() { + } + + public boolean equals(Object o) { + if (this == o) + return true; + if (!(o instanceof StoredByDesc)) + return false; + StoredByDesc that = (StoredByDesc) o; + return xequals(this.className, that.className) + && xequals(this.properties, that.properties) + ; + } + } + +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TableLikeDesc.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TableLikeDesc.java new file mode 100644 index 0000000..1e7e384 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TableLikeDesc.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import javax.xml.bind.annotation.XmlRootElement; + +/** + * A description of the table to create that's like another table. + */ +@XmlRootElement +public class TableLikeDesc extends GroupPermissionsDesc { + public boolean external = false; + public boolean ifNotExists = false; + public String location; + public String existingTable; + public String newTable; + + public TableLikeDesc() { + } + + public String toString() { + return String.format("TableLikeDesc(existingTable=%s, newTable=%s, location=%s", + existingTable, newTable, location); + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TablePropertyDesc.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TablePropertyDesc.java new file mode 100644 index 0000000..adb2eb3 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TablePropertyDesc.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import javax.xml.bind.annotation.XmlRootElement; + +/** + * A description of a table property. + */ +@XmlRootElement +public class TablePropertyDesc extends GroupPermissionsDesc { + public String name; + public String value; + + public TablePropertyDesc() {} + + public String toString() { + return String.format("TablePropertyDesc(name=%s, value=%s)", + name, value); + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java new file mode 100644 index 0000000..8c60f57 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java @@ -0,0 +1,32 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +/** + * The helper class for all the Templeton delegator classes. A + * delegator will call the underlying Templeton service such as hcat + * or hive. + */ +public class TempletonDelegator { + protected AppConfig appConf; + + public TempletonDelegator(AppConfig appConf) { + this.appConf = appConf; + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/UgiFactory.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/UgiFactory.java new file mode 100644 index 0000000..4aac823 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/UgiFactory.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.io.IOException; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.hadoop.security.UserGroupInformation; + +public class UgiFactory { + private static ConcurrentHashMap userUgiMap = + new ConcurrentHashMap(); + + public static UserGroupInformation getUgi(String user) throws IOException { + UserGroupInformation ugi = userUgiMap.get(user); + if (ugi == null) { + //create new ugi and add to map + final UserGroupInformation newUgi = + UserGroupInformation.createProxyUser(user, + UserGroupInformation.getLoginUser()); + + //if another thread adds an entry before the check in this one + // the one created here will not be added. + userUgiMap.putIfAbsent(user, newUgi); + + //use the UGI object that got added + return userUgiMap.get(user); + + } + return ugi; + } + + +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/WadlConfig.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/WadlConfig.java new file mode 100644 index 0000000..872be65 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/WadlConfig.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.util.List; + +import com.sun.jersey.api.wadl.config.WadlGeneratorConfig; +import com.sun.jersey.api.wadl.config.WadlGeneratorDescription; +import com.sun.jersey.server.wadl.generators.resourcedoc.WadlGeneratorResourceDocSupport; + +/** + * Simple class that incorporates javadoc information into the + * wadl produced by jersey. + * + */ +public class WadlConfig extends WadlGeneratorConfig { + + @Override + public List configure() { + return generator(WadlGeneratorResourceDocSupport.class) + .prop("resourceDocStream", "resourcedoc.xml") + .descriptions(); + } + +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/HDFSCleanup.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/HDFSCleanup.java new file mode 100644 index 0000000..a824628 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/HDFSCleanup.java @@ -0,0 +1,151 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton.tool; + +import java.io.IOException; +import java.util.Date; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hive.hcatalog.templeton.tool.TempletonStorage.Type; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * This does periodic cleanup + */ +public class HDFSCleanup extends Thread { + protected Configuration appConf; + + // The interval to wake up and check the queue + public static final String HDFS_CLEANUP_INTERVAL = + "templeton.hdfs.cleanup.interval"; // 12 hours + + // The max age of a task allowed + public static final String HDFS_CLEANUP_MAX_AGE = + "templeton.hdfs.cleanup.maxage"; // ~ 1 week + + protected static long interval = 1000L * 60L * 60L * 12L; + protected static long maxage = 1000L * 60L * 60L * 24L * 7L; + + // The logger + private static final Log LOG = LogFactory.getLog(HDFSCleanup.class); + + // Handle to cancel loop + private boolean stop = false; + + // The instance + private static HDFSCleanup thisclass = null; + + // Whether the cycle is running + private static boolean isRunning = false; + + // The storage root + private String storage_root; + + /** + * Create a cleanup object. + */ + private HDFSCleanup(Configuration appConf) { + this.appConf = appConf; + interval = appConf.getLong(HDFS_CLEANUP_INTERVAL, interval); + maxage = appConf.getLong(HDFS_CLEANUP_MAX_AGE, maxage); + storage_root = appConf.get(TempletonStorage.STORAGE_ROOT); + } + + public static HDFSCleanup getInstance(Configuration appConf) { + if (thisclass != null) { + return thisclass; + } + thisclass = new HDFSCleanup(appConf); + return thisclass; + } + + public static void startInstance(Configuration appConf) throws IOException { + if (!isRunning) { + getInstance(appConf).start(); + } + } + + /** + * Run the cleanup loop. + * + */ + public void run() { + FileSystem fs = null; + while (!stop) { + try { + // Put each check in a separate try/catch, so if that particular + // cycle fails, it'll try again on the next cycle. + try { + if (fs == null) { + fs = FileSystem.get(appConf); + } + checkFiles(fs); + } catch (Exception e) { + LOG.error("Cleanup cycle failed: " + e.getMessage()); + } + + long sleepMillis = (long) (Math.random() * interval); + LOG.info("Next execution: " + new Date(new Date().getTime() + + sleepMillis)); + Thread.sleep(sleepMillis); + + } catch (Exception e) { + // If sleep fails, we should exit now before things get worse. + isRunning = false; + LOG.error("Cleanup failed: " + e.getMessage(), e); + } + } + isRunning = false; + } + + /** + * Loop through all the files, deleting any that are older than + * maxage. + * + * @param fs + * @throws IOException + */ + private void checkFiles(FileSystem fs) throws IOException { + long now = new Date().getTime(); + for (Type type : Type.values()) { + try { + for (FileStatus status : fs.listStatus(new Path( + HDFSStorage.getPath(type, storage_root)))) { + if (now - status.getModificationTime() > maxage) { + LOG.info("Deleting " + status.getPath().toString()); + fs.delete(status.getPath(), true); + } + } + } catch (Exception e) { + // Nothing to find for this type. + } + } + } + + // Handle to stop this process from the outside if needed. + public void exit() { + stop = true; + } + +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/HDFSStorage.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/HDFSStorage.java new file mode 100644 index 0000000..fdcc8c1 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/HDFSStorage.java @@ -0,0 +1,257 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton.tool; + +import java.io.BufferedReader; +import java.io.Closeable; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +/** + * HDFS implementation of templeton storage. + * + * This implementation assumes that all keys in key/value pairs are + * chosen such that they don't have any newlines in them. + * + */ +public class HDFSStorage implements TempletonStorage { + FileSystem fs = null; + + public String storage_root = null; + + public static final String JOB_PATH = "/jobs"; + public static final String JOB_TRACKINGPATH = "/created"; + public static final String OVERHEAD_PATH = "/overhead"; + + private static final Log LOG = LogFactory.getLog(HDFSStorage.class); + + public void startCleanup(Configuration config) { + try { + HDFSCleanup.startInstance(config); + } catch (Exception e) { + LOG.warn("Cleanup instance didn't start."); + } + } + + @Override + public void saveField(Type type, String id, String key, String val) + throws NotFoundException { + if (val == null) { + return; + } + PrintWriter out = null; + //todo: FileSystem#setPermission() - should this make sure to set 777 on jobs/ ? + Path keyfile= new Path(getPath(type) + "/" + id + "/" + key); + try { + // This will replace the old value if there is one + // Overwrite the existing file + out = new PrintWriter(new OutputStreamWriter(fs.create(keyfile))); + out.write(val); + out.flush(); + } catch (Exception e) { + String errMsg = "Couldn't write to " + keyfile + ": " + e.getMessage(); + LOG.error(errMsg, e); + throw new NotFoundException(errMsg, e); + } finally { + close(out); + } + } + + @Override + public String getField(Type type, String id, String key) { + BufferedReader in = null; + Path p = new Path(getPath(type) + "/" + id + "/" + key); + try { + in = new BufferedReader(new InputStreamReader(fs.open(p))); + String line = null; + String val = ""; + while ((line = in.readLine()) != null) { + if (!val.equals("")) { + val += "\n"; + } + val += line; + } + return val; + } catch (Exception e) { + LOG.info("Couldn't find " + p + ": " + e.getMessage(), e); + } finally { + close(in); + } + return null; + } + + @Override + public Map getFields(Type type, String id) { + HashMap map = new HashMap(); + BufferedReader in = null; + Path p = new Path(getPath(type) + "/" + id); + try { + for (FileStatus status : fs.listStatus(p)) { + in = new BufferedReader(new InputStreamReader(fs.open(status.getPath()))); + String line = null; + String val = ""; + while ((line = in.readLine()) != null) { + if (!val.equals("")) { + val += "\n"; + } + val += line; + } + map.put(status.getPath().getName(), val); + } + } catch (IOException e) { + LOG.trace("Couldn't find " + p); + } finally { + close(in); + } + return map; + } + + @Override + public boolean delete(Type type, String id) throws NotFoundException { + Path p = new Path(getPath(type) + "/" + id); + try { + fs.delete(p, true); + } catch (IOException e) { + throw new NotFoundException("Node " + p + " was not found: " + + e.getMessage()); + } + return false; + } + + @Override + public List getAll() { + ArrayList allNodes = new ArrayList(); + for (Type type : Type.values()) { + allNodes.addAll(getAllForType(type)); + } + return allNodes; + } + + @Override + public List getAllForType(Type type) { + ArrayList allNodes = new ArrayList(); + try { + for (FileStatus status : fs.listStatus(new Path(getPath(type)))) { + allNodes.add(status.getPath().getName()); + } + return null; + } catch (Exception e) { + LOG.trace("Couldn't find children for type " + type.toString()); + } + return allNodes; + } + + @Override + public List getAllForKey(String key, String value) { + ArrayList allNodes = new ArrayList(); + try { + for (Type type : Type.values()) { + allNodes.addAll(getAllForTypeAndKey(type, key, value)); + } + } catch (Exception e) { + LOG.trace("Couldn't find children for key " + key + ": " + + e.getMessage()); + } + return allNodes; + } + + @Override + public List getAllForTypeAndKey(Type type, String key, String value) { + ArrayList allNodes = new ArrayList(); + HashMap map = new HashMap(); + try { + for (FileStatus status : + fs.listStatus(new Path(getPath(type)))) { + map = (HashMap) + getFields(type, status.getPath().getName()); + if (map.get(key).equals(value)) { + allNodes.add(status.getPath().getName()); + } + } + } catch (Exception e) { + LOG.trace("Couldn't find children for key " + key + ": " + + e.getMessage()); + } + return allNodes; + } + + @Override + public void openStorage(Configuration config) throws IOException { + storage_root = config.get(TempletonStorage.STORAGE_ROOT); + if (fs == null) { + fs = FileSystem.get(config); + } + } + + @Override + public void closeStorage() throws IOException { + // Nothing to do here + } + + /** + * Get the path to storage based on the type. + * @param type + */ + public String getPath(Type type) { + return getPath(type, storage_root); + } + + /** + * Static method to get the path based on the type. + * + * @param type + * @param root + */ + public static String getPath(Type type, String root) { + String typepath = root + OVERHEAD_PATH; + switch (type) { + case JOB: + typepath = root + JOB_PATH; + break; + case JOBTRACKING: + typepath = root + JOB_TRACKINGPATH; + break; + } + return typepath; + } + private void close(Closeable is) { + if(is == null) { + return; + } + try { + is.close(); + } + catch (IOException ex) { + LOG.trace("Failed to close InputStream: " + ex.getMessage()); + } + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobState.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobState.java new file mode 100644 index 0000000..f76465f --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobState.java @@ -0,0 +1,344 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton.tool; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; + +/** + * The persistent state of a job. The state is stored in one of the + * supported storage systems. + */ +public class JobState { + + private static final Log LOG = LogFactory.getLog(JobState.class); + + private String id; + + // Storage is instantiated in the constructor + private TempletonStorage storage = null; + + private static TempletonStorage.Type type = TempletonStorage.Type.JOB; + + private Configuration config = null; + + public JobState(String id, Configuration conf) + throws IOException { + this.id = id; + config = conf; + storage = getStorage(conf); + } + + public void delete() + throws IOException { + try { + storage.delete(type, id); + } catch (Exception e) { + // Error getting children of node -- probably node has been deleted + LOG.info("Couldn't delete " + id); + } + } + + /** + * Get an instance of the selected storage class. Defaults to + * HDFS storage if none is specified. + */ + public static TempletonStorage getStorageInstance(Configuration conf) { + TempletonStorage storage = null; + try { + storage = (TempletonStorage) + Class.forName(conf.get(TempletonStorage.STORAGE_CLASS)) + .newInstance(); + } catch (Exception e) { + LOG.warn("No storage method found: " + e.getMessage()); + try { + storage = new HDFSStorage(); + } catch (Exception ex) { + LOG.error("Couldn't create storage."); + } + } + return storage; + } + + /** + * Get an open instance of the selected storage class. Defaults + * to HDFS storage if none is specified. + */ + public static TempletonStorage getStorage(Configuration conf) throws IOException { + TempletonStorage storage = getStorageInstance(conf); + storage.openStorage(conf); + return storage; + } + + /** + * For storage methods that require a connection, this is a hint + * that it's time to close the connection. + */ + public void close() throws IOException { + storage.closeStorage(); + } + + // + // Properties + // + + /** + * This job id. + */ + public String getId() { + return id; + } + + /** + * The percent complete of a job + */ + public String getPercentComplete() + throws IOException { + return getField("percentComplete"); + } + + public void setPercentComplete(String percent) + throws IOException { + setField("percentComplete", percent); + } + + /** + * The child id of TempletonControllerJob + */ + public String getChildId() + throws IOException { + return getField("childid"); + } + + public void setChildId(String childid) + throws IOException { + setField("childid", childid); + } + + /** + * Add a jobid to the list of children of this job. + * + * @param jobid + * @throws IOException + */ + public void addChild(String jobid) throws IOException { + String jobids = ""; + try { + jobids = getField("children"); + } catch (Exception e) { + // There are none or they're not readable. + } + if (!jobids.equals("")) { + jobids += ","; + } + jobids += jobid; + setField("children", jobids); + } + + /** + * Get a list of jobstates for jobs that are children of this job. + * @throws IOException + */ + public List getChildren() throws IOException { + ArrayList children = new ArrayList(); + for (String jobid : getField("children").split(",")) { + children.add(new JobState(jobid, config)); + } + return children; + } + + /** + * Save a comma-separated list of jobids that are children + * of this job. + * @param jobids + * @throws IOException + */ + public void setChildren(String jobids) throws IOException { + setField("children", jobids); + } + + /** + * Set the list of child jobs of this job + * @param children + */ + public void setChildren(List children) throws IOException { + String val = ""; + for (JobState jobstate : children) { + if (!val.equals("")) { + val += ","; + } + val += jobstate.getId(); + } + setField("children", val); + } + + /** + * The system exit value of the job. + */ + public Long getExitValue() + throws IOException { + return getLongField("exitValue"); + } + + public void setExitValue(long exitValue) + throws IOException { + setLongField("exitValue", exitValue); + } + + /** + * When this job was created. + */ + public Long getCreated() + throws IOException { + return getLongField("created"); + } + + public void setCreated(long created) + throws IOException { + setLongField("created", created); + } + + /** + * The user who started this job. + */ + public String getUser() + throws IOException { + return getField("user"); + } + + public void setUser(String user) + throws IOException { + setField("user", user); + } + + /** + * The url callback + */ + public String getCallback() + throws IOException { + return getField("callback"); + } + + public void setCallback(String callback) + throws IOException { + setField("callback", callback); + } + + /** + * The status of a job once it is completed. + */ + public String getCompleteStatus() + throws IOException { + return getField("completed"); + } + + public void setCompleteStatus(String complete) + throws IOException { + setField("completed", complete); + } + + /** + * The time when the callback was sent. + */ + public Long getNotifiedTime() + throws IOException { + return getLongField("notified"); + } + + public void setNotifiedTime(long notified) + throws IOException { + setLongField("notified", notified); + } + + // + // Helpers + // + + /** + * Fetch an integer field from the store. + */ + public Long getLongField(String name) + throws IOException { + String s = storage.getField(type, id, name); + if (s == null) + return null; + else { + try { + return new Long(s); + } catch (NumberFormatException e) { + LOG.error("templeton: bug " + name + " " + s + " : " + e); + return null; + } + } + } + + /** + * Store a String field from the store. + */ + public void setField(String name, String val) + throws IOException { + try { + storage.saveField(type, id, name, val); + } catch (NotFoundException ne) { + throw new IOException(ne.getMessage()); + } + } + + public String getField(String name) + throws IOException { + return storage.getField(type, id, name); + } + + /** + * Store a long field. + * + * @param name + * @param val + * @throws IOException + */ + public void setLongField(String name, long val) + throws IOException { + try { + storage.saveField(type, id, name, String.valueOf(val)); + } catch (NotFoundException ne) { + throw new IOException("Job " + id + " was not found: " + + ne.getMessage()); + } + } + + /** + * Get an id for each currently existing job, which can be used to create + * a JobState object. + * + * @param conf + * @throws IOException + */ + public static List getJobs(Configuration conf) throws IOException { + try { + return getStorage(conf).getAllForType(type); + } catch (Exception e) { + throw new IOException("Can't get jobs", e); + } + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobStateTracker.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobStateTracker.java new file mode 100644 index 0000000..ad1880a --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobStateTracker.java @@ -0,0 +1,146 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton.tool; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.ZooDefs.Ids; +import org.apache.zookeeper.data.Stat; + +public class JobStateTracker { + // The path to the tracking root + private String job_trackingroot = null; + + // The zookeeper connection to use + private ZooKeeper zk; + + // The id of the tracking node -- must be a SEQUENTIAL node + private String trackingnode; + + // The id of the job this tracking node represents + private String jobid; + + // The logger + private static final Log LOG = LogFactory.getLog(JobStateTracker.class); + + /** + * Constructor for a new node -- takes the jobid of an existing job + * + */ + public JobStateTracker(String node, ZooKeeper zk, boolean nodeIsTracker, + String job_trackingpath) { + this.zk = zk; + if (nodeIsTracker) { + trackingnode = node; + } else { + jobid = node; + } + job_trackingroot = job_trackingpath; + } + + /** + * Create the parent znode for this job state. + */ + public void create() + throws IOException { + String[] paths = ZooKeeperStorage.getPaths(job_trackingroot); + for (String znode : paths) { + try { + zk.create(znode, new byte[0], + Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); + } catch (KeeperException.NodeExistsException e) { + } catch (Exception e) { + throw new IOException("Unable to create parent nodes"); + } + } + try { + trackingnode = zk.create(makeTrackingZnode(), jobid.getBytes(), + Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT_SEQUENTIAL); + } catch (Exception e) { + throw new IOException("Unable to create " + makeTrackingZnode()); + } + } + + public void delete() + throws IOException { + try { + zk.delete(makeTrackingJobZnode(trackingnode), -1); + } catch (Exception e) { + // Might have been deleted already + LOG.info("Couldn't delete " + makeTrackingJobZnode(trackingnode)); + } + } + + /** + * Get the jobid for this tracking node + * @throws IOException + */ + public String getJobID() throws IOException { + try { + return new String(zk.getData(makeTrackingJobZnode(trackingnode), + false, new Stat())); + } catch (KeeperException e) { + // It was deleted during the transaction + throw new IOException("Node already deleted " + trackingnode); + } catch (InterruptedException e) { + throw new IOException("Couldn't read node " + trackingnode); + } + } + + /** + * Make a ZK path to a new tracking node + */ + public String makeTrackingZnode() { + return job_trackingroot + "/"; + } + + /** + * Make a ZK path to an existing tracking node + */ + public String makeTrackingJobZnode(String nodename) { + return job_trackingroot + "/" + nodename; + } + + /* + * Get the list of tracking jobs. These can be used to determine which jobs have + * expired. + */ + public static List getTrackingJobs(Configuration conf, ZooKeeper zk) + throws IOException { + ArrayList jobs = new ArrayList(); + try { + for (String myid : zk.getChildren( + conf.get(TempletonStorage.STORAGE_ROOT) + + ZooKeeperStorage.TRACKINGDIR, false)) { + jobs.add(myid); + } + } catch (Exception e) { + throw new IOException("Can't get tracking children", e); + } + return jobs; + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/NotFoundException.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/NotFoundException.java new file mode 100644 index 0000000..4949098 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/NotFoundException.java @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton.tool; + +/** + * Simple not found exception. + */ +public class NotFoundException extends Exception { + private static final long serialVersionUID = 1L; + + public NotFoundException(String msg) { + super(msg); + } + public NotFoundException(String msg, Throwable rootCause) { + super(msg, rootCause); + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/NullRecordReader.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/NullRecordReader.java new file mode 100644 index 0000000..8fe2184 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/NullRecordReader.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton.tool; + +import java.io.IOException; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; + +/** + * An empty record reader. + */ +public class NullRecordReader + extends RecordReader { + @Override + public void initialize(InputSplit genericSplit, TaskAttemptContext context) + throws IOException { + } + + @Override + public void close() throws IOException { + } + + @Override + public NullWritable getCurrentKey() { + return NullWritable.get(); + } + + @Override + public NullWritable getCurrentValue() { + return NullWritable.get(); + } + + @Override + public float getProgress() { + return 1.0f; + } + + @Override + public boolean nextKeyValue() throws IOException { + return false; + } +} + diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/NullSplit.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/NullSplit.java new file mode 100644 index 0000000..e2cf5f9 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/NullSplit.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton.tool; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapreduce.InputSplit; + +/** + * An empty splitter. + */ +public class NullSplit extends InputSplit implements Writable { + public long getLength() { return 0; } + + public String[] getLocations() throws IOException { + return new String[]{}; + } + + @Override + public void write(DataOutput out) throws IOException {} + + @Override + public void readFields(DataInput in) throws IOException {} +} + diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/SingleInputFormat.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/SingleInputFormat.java new file mode 100644 index 0000000..a638174 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/SingleInputFormat.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton.tool; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.InputFormat; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; + +/** + * An empty InputFormat. + */ +public class SingleInputFormat + extends InputFormat { + public List getSplits(JobContext job) + throws IOException { + List res = new ArrayList(); + res.add(new NullSplit()); + return res; + } + + public RecordReader + createRecordReader(InputSplit split, + TaskAttemptContext context) + throws IOException { + return new NullRecordReader(); + } +} + diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java new file mode 100644 index 0000000..cb0bf71 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonControllerJob.java @@ -0,0 +1,351 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton.tool; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.JobClient; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobID; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat; +import org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +/** + * A Map Reduce job that will start another job. + * + * We have a single Mapper job that starts a child MR job. The parent + * monitors the child child job and ends when the child job exits. In + * addition, we + * + * - write out the parent job id so the caller can record it. + * - run a keep alive thread so the job doesn't end. + * - Optionally, store the stdout, stderr, and exit value of the child + * in hdfs files. + */ +public class TempletonControllerJob extends Configured implements Tool { + public static final String COPY_NAME = "templeton.copy"; + public static final String STATUSDIR_NAME = "templeton.statusdir"; + public static final String JAR_ARGS_NAME = "templeton.args"; + public static final String OVERRIDE_CLASSPATH = "templeton.override-classpath"; + + public static final String STDOUT_FNAME = "stdout"; + public static final String STDERR_FNAME = "stderr"; + public static final String EXIT_FNAME = "exit"; + + public static final int WATCHER_TIMEOUT_SECS = 10; + public static final int KEEP_ALIVE_MSEC = 60 * 1000; + + public static final String TOKEN_FILE_ARG_PLACEHOLDER + = "__WEBHCAT_TOKEN_FILE_LOCATION__"; + + + private static TrivialExecService execService = TrivialExecService.getInstance(); + + private static final Log LOG = LogFactory.getLog(TempletonControllerJob.class); + + + public static class LaunchMapper + extends Mapper { + protected Process startJob(Context context, String user, + String overrideClasspath) + throws IOException, InterruptedException { + Configuration conf = context.getConfiguration(); + copyLocal(COPY_NAME, conf); + String[] jarArgs + = TempletonUtils.decodeArray(conf.get(JAR_ARGS_NAME)); + + ArrayList removeEnv = new ArrayList(); + removeEnv.add("HADOOP_ROOT_LOGGER"); + Map env = TempletonUtils.hadoopUserEnv(user, + overrideClasspath); + List jarArgsList = new LinkedList(Arrays.asList(jarArgs)); + String tokenFile = System.getenv("HADOOP_TOKEN_FILE_LOCATION"); + + + if (tokenFile != null) { + //Token is available, so replace the placeholder + String tokenArg = "mapreduce.job.credentials.binary=" + tokenFile; + for(int i=0; i it = jarArgsList.iterator(); + while(it.hasNext()){ + String arg = it.next(); + if(arg.contains(TOKEN_FILE_ARG_PLACEHOLDER)){ + it.remove(); + } + } + } + return execService.run(jarArgsList, removeEnv, env); + } + + private void copyLocal(String var, Configuration conf) + throws IOException { + String[] filenames = TempletonUtils.decodeArray(conf.get(var)); + if (filenames != null) { + for (String filename : filenames) { + Path src = new Path(filename); + Path dst = new Path(src.getName()); + FileSystem fs = src.getFileSystem(conf); + System.err.println("templeton: copy " + src + " => " + dst); + fs.copyToLocalFile(src, dst); + } + } + } + + @Override + public void run(Context context) + throws IOException, InterruptedException { + + Configuration conf = context.getConfiguration(); + + Process proc = startJob(context, + conf.get("user.name"), + conf.get(OVERRIDE_CLASSPATH)); + + String statusdir = conf.get(STATUSDIR_NAME); + + if (statusdir != null) { + statusdir = TempletonUtils.addUserHomeDirectoryIfApplicable(statusdir, conf.get("user.name"), conf); + } + + ExecutorService pool = Executors.newCachedThreadPool(); + executeWatcher(pool, conf, context.getJobID(), + proc.getInputStream(), statusdir, STDOUT_FNAME); + executeWatcher(pool, conf, context.getJobID(), + proc.getErrorStream(), statusdir, STDERR_FNAME); + KeepAlive keepAlive = startCounterKeepAlive(pool, context); + + proc.waitFor(); + keepAlive.sendReport = false; + pool.shutdown(); + if (!pool.awaitTermination(WATCHER_TIMEOUT_SECS, TimeUnit.SECONDS)) + pool.shutdownNow(); + + writeExitValue(conf, proc.exitValue(), statusdir); + JobState state = new JobState(context.getJobID().toString(), conf); + state.setExitValue(proc.exitValue()); + state.setCompleteStatus("done"); + state.close(); + + if (proc.exitValue() != 0) + System.err.println("templeton: job failed with exit code " + + proc.exitValue()); + else + System.err.println("templeton: job completed with exit code 0"); + } + + private void executeWatcher(ExecutorService pool, Configuration conf, + JobID jobid, InputStream in, String statusdir, + String name) + throws IOException { + Watcher w = new Watcher(conf, jobid, in, statusdir, name); + pool.execute(w); + } + + private KeepAlive startCounterKeepAlive(ExecutorService pool, Context cnt) + throws IOException { + KeepAlive k = new KeepAlive(cnt); + pool.execute(k); + return k; + } + + private void writeExitValue(Configuration conf, int exitValue, String statusdir) + throws IOException { + if (TempletonUtils.isset(statusdir)) { + Path p = new Path(statusdir, EXIT_FNAME); + FileSystem fs = p.getFileSystem(conf); + OutputStream out = fs.create(p); + System.err.println("templeton: Writing exit value " + + exitValue + " to " + p); + PrintWriter writer = new PrintWriter(out); + writer.println(exitValue); + writer.close(); + } + } + } + + private static class Watcher implements Runnable { + private InputStream in; + private OutputStream out; + private JobID jobid; + private Configuration conf; + + public Watcher(Configuration conf, JobID jobid, InputStream in, + String statusdir, String name) + throws IOException { + this.conf = conf; + this.jobid = jobid; + this.in = in; + + if (name.equals(STDERR_FNAME)) + out = System.err; + else + out = System.out; + + if (TempletonUtils.isset(statusdir)) { + Path p = new Path(statusdir, name); + FileSystem fs = p.getFileSystem(conf); + out = fs.create(p); + System.err.println("templeton: Writing status to " + p); + } + } + + @Override + public void run() { + try { + InputStreamReader isr = new InputStreamReader(in); + BufferedReader reader = new BufferedReader(isr); + PrintWriter writer = new PrintWriter(out); + + String line; + while ((line = reader.readLine()) != null) { + writer.println(line); + JobState state = null; + try { + String percent = TempletonUtils.extractPercentComplete(line); + String childid = TempletonUtils.extractChildJobId(line); + + if (percent != null || childid != null) { + state = new JobState(jobid.toString(), conf); + state.setPercentComplete(percent); + state.setChildId(childid); + } + } catch (IOException e) { + System.err.println("templeton: state error: " + e); + } finally { + if (state != null) { + try { + state.close(); + } catch (IOException e) { + } + } + } + } + writer.flush(); + } catch (IOException e) { + System.err.println("templeton: execute error: " + e); + } + } + } + + private static class KeepAlive implements Runnable { + private final Mapper.Context cnt; + private volatile boolean sendReport; + + public KeepAlive(Mapper.Context cnt) { + this.cnt = cnt; + this.sendReport = true; + } + + @Override + public void run() { + try { + while (sendReport) { + cnt.progress(); + Thread.sleep(KEEP_ALIVE_MSEC); + } + } catch (InterruptedException e) { + // Ok to be interrupted + } + } + } + + private JobID submittedJobId; + + public String getSubmittedId() { + if (submittedJobId == null) + return null; + else + return submittedJobId.toString(); + } + + /** + * Enqueue the job and print out the job id for later collection. + */ + @Override + public int run(String[] args) + throws IOException, InterruptedException, ClassNotFoundException { + Configuration conf = getConf(); + conf.set(JAR_ARGS_NAME, TempletonUtils.encodeArray(args)); + conf.set("user.name", UserGroupInformation.getCurrentUser().getShortUserName()); + Job job = new Job(conf); + job.setJarByClass(TempletonControllerJob.class); + job.setJobName("TempletonControllerJob"); + job.setMapperClass(LaunchMapper.class); + job.setMapOutputKeyClass(Text.class); + job.setMapOutputValueClass(Text.class); + job.setInputFormatClass(SingleInputFormat.class); + NullOutputFormat of + = new NullOutputFormat(); + job.setOutputFormatClass(of.getClass()); + job.setNumReduceTasks(0); + + JobClient jc = new JobClient(new JobConf(job.getConfiguration())); + + Token mrdt = jc.getDelegationToken(new Text("mr token")); + job.getCredentials().addToken(new Text("mr token"), mrdt); + job.submit(); + + submittedJobId = job.getJobID(); + + return 0; + } + + + public static void main(String[] args) throws Exception { + int ret = ToolRunner.run(new TempletonControllerJob(), args); + if (ret != 0) + System.err.println("TempletonControllerJob failed!"); + System.exit(ret); + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonStorage.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonStorage.java new file mode 100644 index 0000000..ba08556 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonStorage.java @@ -0,0 +1,153 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton.tool; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; + +/** + * An interface to handle different Templeton storage methods, including + * ZooKeeper and HDFS. Any storage scheme must be able to handle being + * run in an HDFS environment, where specific file systems and virtual + * machines may not be available. + * + * Storage is done individually in a hierarchy: type (the data type, + * as listed below), then the id (a given jobid, jobtrackingid, etc.), + * then the key/value pairs. So an entry might look like: + * + * JOB + * jobid00035 + * user -> rachel + * datecreated -> 2/5/12 + * etc. + * + * Each field must be available to be fetched/changed individually. + */ +public interface TempletonStorage { + // These are the possible types referenced by 'type' below. + public enum Type { + UNKNOWN, JOB, JOBTRACKING, TEMPLETONOVERHEAD + } + + public static final String STORAGE_CLASS = "templeton.storage.class"; + public static final String STORAGE_ROOT = "templeton.storage.root"; + + /** + * Start the cleanup process for this storage type. + * @param config + */ + public void startCleanup(Configuration config); + + /** + * Save a single key/value pair for a specific job id. + * @param type The data type (as listed above) + * @param id The String id of this data grouping (jobid, etc.) + * @param key The name of the field to save + * @param val The value of the field to save + */ + public void saveField(Type type, String id, String key, String val) + throws NotFoundException; + + /** + * Get the value of one field for a given data type. If the type + * is UNKNOWN, search for the id in all types. + * @param type The data type (as listed above) + * @param id The String id of this data grouping (jobid, etc.) + * @param key The name of the field to retrieve + * @return The value of the field requested, or null if not + * found. + */ + public String getField(Type type, String id, String key); + + /** + * Get all the name/value pairs stored for this id. + * Be careful using getFields() -- optimistic locking will mean that + * your odds of a conflict are decreased if you read/write one field + * at a time. getFields() is intended for read-only usage. + * + * If the type is UNKNOWN, search for the id in all types. + * + * @param type The data type (as listed above) + * @param id The String id of this data grouping (jobid, etc.) + * @return A Map of key/value pairs found for this type/id. + */ + public Map getFields(Type type, String id); + + /** + * Delete a data grouping (all data for a jobid, all tracking data + * for a job, etc.). If the type is UNKNOWN, search for the id + * in all types. + * + * @param type The data type (as listed above) + * @param id The String id of this data grouping (jobid, etc.) + * @return True if successful, false if not, throws NotFoundException + * if the id wasn't found. + */ + public boolean delete(Type type, String id) throws NotFoundException; + + /** + * Get the id of each data grouping in the storage system. + * + * @return An ArrayList of ids. + */ + public List getAll(); + + /** + * Get the id of each data grouping of a given type in the storage + * system. + * @param type The data type (as listed above) + * @return An ArrayList of ids. + */ + public List getAllForType(Type type); + + /** + * Get the id of each data grouping that has the specific key/value + * pair. + * @param key The name of the field to search for + * @param value The value of the field to search for + * @return An ArrayList of ids. + */ + public List getAllForKey(String key, String value); + + /** + * Get the id of each data grouping of a given type that has the + * specific key/value pair. + * @param type The data type (as listed above) + * @param key The name of the field to search for + * @param value The value of the field to search for + * @return An ArrayList of ids. + */ + public List getAllForTypeAndKey(Type type, String key, + String value); + + /** + * For storage methods that require a connection, this is a hint + * that it's time to open a connection. + */ + public void openStorage(Configuration config) throws IOException; + + /** + * For storage methods that require a connection, this is a hint + * that it's time to close the connection. + */ + public void closeStorage() throws IOException; +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java new file mode 100644 index 0000000..61002b9 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TempletonUtils.java @@ -0,0 +1,302 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton.tool; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.net.URLConnection; +import java.security.PrivilegedExceptionAction; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.StringUtils; +import org.apache.hive.hcatalog.templeton.UgiFactory; + +/** + * General utility methods. + */ +public class TempletonUtils { + /** + * Is the object non-empty? + */ + public static boolean isset(String s) { + return (s != null) && (s.length() > 0); + } + + /** + * Is the object non-empty? + */ + public static boolean isset(char ch) { + return (ch != 0); + } + + /** + * Is the object non-empty? + */ + public static boolean isset(T[] a) { + return (a != null) && (a.length > 0); + } + + + /** + * Is the object non-empty? + */ + public static boolean isset(Collection col) { + return (col != null) && (!col.isEmpty()); + } + + /** + * Is the object non-empty? + */ + public static boolean isset(Map col) { + return (col != null) && (!col.isEmpty()); + } + + + public static final Pattern JAR_COMPLETE + = Pattern.compile(" map \\d+%\\s+reduce \\d+%$"); + public static final Pattern PIG_COMPLETE = Pattern.compile(" \\d+% complete$"); + + /** + * Extract the percent complete line from Pig or Jar jobs. + */ + public static String extractPercentComplete(String line) { + Matcher jar = JAR_COMPLETE.matcher(line); + if (jar.find()) + return jar.group().trim(); + + Matcher pig = PIG_COMPLETE.matcher(line); + if (pig.find()) + return pig.group().trim(); + + return null; + } + + public static final Pattern JAR_ID = Pattern.compile(" Running job: (\\S+)$"); + public static final Pattern PIG_ID = Pattern.compile(" HadoopJobId: (\\S+)$"); + public static final Pattern[] ID_PATTERNS = {JAR_ID, PIG_ID}; + + /** + * Extract the job id from jar jobs. + */ + public static String extractChildJobId(String line) { + for (Pattern p : ID_PATTERNS) { + Matcher m = p.matcher(line); + if (m.find()) + return m.group(1); + } + + return null; + } + + /** + * Take an array of strings and encode it into one string. + */ + public static String encodeArray(String[] plain) { + if (plain == null) + return null; + + String[] escaped = new String[plain.length]; + + for (int i = 0; i < plain.length; ++i) { + if (plain[i] == null) { + plain[i] = ""; + } + escaped[i] = StringUtils.escapeString(plain[i]); + } + + return StringUtils.arrayToString(escaped); + } + + /** + * Encode a List into a string. + */ + public static String encodeArray(List list) { + if (list == null) + return null; + String[] array = new String[list.size()]; + return encodeArray(list.toArray(array)); + } + + /** + * Take an encode strings and decode it into an array of strings. + */ + public static String[] decodeArray(String s) { + if (s == null) + return null; + + String[] escaped = StringUtils.split(s); + String[] plain = new String[escaped.length]; + + for (int i = 0; i < escaped.length; ++i) + plain[i] = StringUtils.unEscapeString(escaped[i]); + + return plain; + } + + public static String[] hadoopFsListAsArray(String files, Configuration conf, + String user) + throws URISyntaxException, FileNotFoundException, IOException, + InterruptedException { + if (files == null || conf == null) { + return null; + } + String[] dirty = files.split(","); + String[] clean = new String[dirty.length]; + + for (int i = 0; i < dirty.length; ++i) + clean[i] = hadoopFsFilename(dirty[i], conf, user); + + return clean; + } + + public static String hadoopFsListAsString(String files, Configuration conf, + String user) + throws URISyntaxException, FileNotFoundException, IOException, + InterruptedException { + if (files == null || conf == null) { + return null; + } + return StringUtils.arrayToString(hadoopFsListAsArray(files, conf, user)); + } + + public static String hadoopFsFilename(String fname, Configuration conf, String user) + throws URISyntaxException, FileNotFoundException, IOException, + InterruptedException { + Path p = hadoopFsPath(fname, conf, user); + if (p == null) + return null; + else + return p.toString(); + } + + /** + * @return true iff we are sure the file is not there. + */ + public static boolean hadoopFsIsMissing(FileSystem fs, Path p) { + try { + return !fs.exists(p); + } catch (Throwable t) { + // Got an error, might be there anyway due to a + // permissions problem. + return false; + } + } + + public static String addUserHomeDirectoryIfApplicable(String origPathStr, String user, Configuration conf) throws IOException { + Path path = new Path(origPathStr); + String result = origPathStr; + + // shortcut for s3/asv + // If path contains scheme, user should mean an absolute path, + // However, path.isAbsolute tell us otherwise. + // So we skip conversion for non-hdfs. + if (!(path.getFileSystem(conf) instanceof DistributedFileSystem)&& + !(path.getFileSystem(conf) instanceof LocalFileSystem)) { + return result; + } + if (!path.isAbsolute()) { + result = "/user/" + user + "/" + origPathStr; + } + return result; + } + + public static Path hadoopFsPath(String fname, final Configuration conf, String user) + throws URISyntaxException, IOException, + InterruptedException { + if (fname == null || conf == null) { + return null; + } + + UserGroupInformation ugi; + if (user!=null) { + ugi = UgiFactory.getUgi(user); + } else { + ugi = UserGroupInformation.getLoginUser(); + } + final String finalFName = new String(fname); + + final FileSystem defaultFs = + ugi.doAs(new PrivilegedExceptionAction() { + public FileSystem run() + throws URISyntaxException, IOException, InterruptedException { + return FileSystem.get(new URI(finalFName), conf); + } + }); + + fname = addUserHomeDirectoryIfApplicable(fname, user, conf); + URI u = new URI(fname); + Path p = new Path(u).makeQualified(defaultFs); + + if (hadoopFsIsMissing(defaultFs, p)) + throw new FileNotFoundException("File " + fname + " does not exist."); + + return p; + } + + /** + * GET the given url. Returns the number of bytes received. + */ + public static int fetchUrl(URL url) + throws IOException { + URLConnection cnx = url.openConnection(); + InputStream in = cnx.getInputStream(); + + byte[] buf = new byte[8192]; + int total = 0; + int len = 0; + while ((len = in.read(buf)) >= 0) + total += len; + + return total; + } + + /** + * Set the environment variables to specify the hadoop user. + */ + public static Map hadoopUserEnv(String user, + String overrideClasspath) { + HashMap env = new HashMap(); + env.put("HADOOP_USER_NAME", user); + + if (overrideClasspath != null) { + env.put("HADOOP_USER_CLASSPATH_FIRST", "true"); + String cur = System.getenv("HADOOP_CLASSPATH"); + if (TempletonUtils.isset(cur)) + overrideClasspath = overrideClasspath + ":" + cur; + env.put("HADOOP_CLASSPATH", overrideClasspath); + } + + return env; + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java new file mode 100644 index 0000000..45b2bdc --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton.tool; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +/** + * Execute a local program. This is a singleton service that will + * execute a programs on the local box. + */ +public class TrivialExecService { + private static volatile TrivialExecService theSingleton; + + /** + * Retrieve the singleton. + */ + public static synchronized TrivialExecService getInstance() { + if (theSingleton == null) + theSingleton = new TrivialExecService(); + return theSingleton; + } + + public Process run(List cmd, List removeEnv, + Map environmentVariables) + throws IOException { + System.err.println("templeton: starting " + cmd); + System.err.print("With environment variables: "); + for (Map.Entry keyVal : environmentVariables.entrySet()) { + System.err.println(keyVal.getKey() + "=" + keyVal.getValue()); + } + ProcessBuilder pb = new ProcessBuilder(cmd); + for (String key : removeEnv) + pb.environment().remove(key); + pb.environment().putAll(environmentVariables); + return pb.start(); + } + +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ZooKeeperCleanup.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ZooKeeperCleanup.java new file mode 100644 index 0000000..1452390 --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ZooKeeperCleanup.java @@ -0,0 +1,199 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton.tool; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Date; + +import org.apache.hadoop.conf.Configuration; +import org.apache.zookeeper.ZooKeeper; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * This does periodic cleanup + */ +public class ZooKeeperCleanup extends Thread { + protected Configuration appConf; + + // The interval to wake up and check the queue + public static final String ZK_CLEANUP_INTERVAL = + "templeton.zookeeper.cleanup.interval"; // 12 hours + + // The max age of a task allowed + public static final String ZK_CLEANUP_MAX_AGE = + "templeton.zookeeper.cleanup.maxage"; // ~ 1 week + + protected static long interval = 1000L * 60L * 60L * 12L; + protected static long maxage = 1000L * 60L * 60L * 24L * 7L; + + // The logger + private static final Log LOG = LogFactory.getLog(ZooKeeperCleanup.class); + + // Handle to cancel loop + private boolean stop = false; + + // The instance + private static ZooKeeperCleanup thisclass = null; + + // Whether the cycle is running + private static boolean isRunning = false; + + /** + * Create a cleanup object. We use the appConfig to configure JobState. + * @param appConf + */ + private ZooKeeperCleanup(Configuration appConf) { + this.appConf = appConf; + interval = appConf.getLong(ZK_CLEANUP_INTERVAL, interval); + maxage = appConf.getLong(ZK_CLEANUP_MAX_AGE, maxage); + } + + public static ZooKeeperCleanup getInstance(Configuration appConf) { + if (thisclass != null) { + return thisclass; + } + thisclass = new ZooKeeperCleanup(appConf); + return thisclass; + } + + public static void startInstance(Configuration appConf) throws IOException { + if (!isRunning) { + getInstance(appConf).start(); + } + } + + /** + * Run the cleanup loop. + * + * @throws IOException + */ + public void run() { + ZooKeeper zk = null; + List nodes = null; + isRunning = true; + while (!stop) { + try { + // Put each check in a separate try/catch, so if that particular + // cycle fails, it'll try again on the next cycle. + try { + zk = ZooKeeperStorage.zkOpen(appConf); + + nodes = getChildList(zk); + + for (String node : nodes) { + boolean deleted = checkAndDelete(node, zk); + if (!deleted) { + break; + } + } + + zk.close(); + } catch (Exception e) { + LOG.error("Cleanup cycle failed: " + e.getMessage()); + } finally { + if (zk != null) { + try { + zk.close(); + } catch (InterruptedException e) { + // We're trying to exit anyway, just ignore. + } + } + } + + long sleepMillis = (long) (Math.random() * interval); + LOG.info("Next execution: " + new Date(new Date().getTime() + + sleepMillis)); + Thread.sleep(sleepMillis); + + } catch (Exception e) { + // If sleep fails, we should exit now before things get worse. + isRunning = false; + LOG.error("Cleanup failed: " + e.getMessage(), e); + } + } + isRunning = false; + } + + /** + * Get the list of jobs from JobState + * + * @throws IOException + */ + public List getChildList(ZooKeeper zk) { + try { + List jobs = JobStateTracker.getTrackingJobs(appConf, zk); + Collections.sort(jobs); + return jobs; + } catch (IOException e) { + LOG.info("No jobs to check."); + } + return new ArrayList(); + } + + /** + * Check to see if a job is more than maxage old, and delete it if so. + */ + public boolean checkAndDelete(String node, ZooKeeper zk) { + JobState state = null; + try { + JobStateTracker tracker = new JobStateTracker(node, zk, true, + appConf.get(TempletonStorage.STORAGE_ROOT + + ZooKeeperStorage.TRACKINGDIR)); + long now = new Date().getTime(); + state = new JobState(tracker.getJobID(), appConf); + + // Set the default to 0 -- if the created date is null, there was + // an error in creation, and we want to delete it anyway. + long then = 0; + if (state.getCreated() != null) { + then = state.getCreated(); + } + if (now - then > maxage) { + LOG.info("Deleting " + tracker.getJobID()); + state.delete(); + tracker.delete(); + return true; + } + return false; + } catch (Exception e) { + LOG.info("checkAndDelete failed for " + node); + // We don't throw a new exception for this -- just keep going with the + // next one. + return true; + } finally { + if (state != null) { + try { + state.close(); + } catch (IOException e) { + LOG.info("Couldn't close job state."); + } + } + } + } + + // Handle to stop this process from the outside if needed. + public void exit() { + stop = true; + } +} diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ZooKeeperStorage.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ZooKeeperStorage.java new file mode 100644 index 0000000..98c65ea --- /dev/null +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/ZooKeeperStorage.java @@ -0,0 +1,372 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton.tool; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.WatchedEvent; +import org.apache.zookeeper.Watcher; +import org.apache.zookeeper.ZooDefs.Ids; +import org.apache.zookeeper.ZooKeeper; + +/** + * A storage implementation based on storing everything in ZooKeeper. + * This keeps everything in a central location that is guaranteed + * to be available and accessible. + * + * Data is stored with each key/value pair being a node in ZooKeeper. + */ +public class ZooKeeperStorage implements TempletonStorage { + + public static final String TRACKINGDIR = "/created"; + + // Locations for each of the storage types + public String storage_root = null; + public String job_path = null; + public String job_trackingpath = null; + public String overhead_path = null; + + public static final String ZK_HOSTS = "templeton.zookeeper.hosts"; + public static final String ZK_SESSION_TIMEOUT + = "templeton.zookeeper.session-timeout"; + + public static final String ENCODING = "UTF-8"; + + private static final Log LOG = LogFactory.getLog(ZooKeeperStorage.class); + + private ZooKeeper zk; + + /** + * Open a ZooKeeper connection for the JobState. + */ + public static ZooKeeper zkOpen(String zkHosts, int zkSessionTimeout) + throws IOException { + return new ZooKeeper(zkHosts, + zkSessionTimeout, + new Watcher() { + @Override + synchronized public void process(WatchedEvent event) { + } + }); + } + + /** + * Open a ZooKeeper connection for the JobState. + */ + public static ZooKeeper zkOpen(Configuration conf) + throws IOException { + return zkOpen(conf.get(ZK_HOSTS), + conf.getInt(ZK_SESSION_TIMEOUT, 30000)); + } + + public ZooKeeperStorage() { + // No-op -- this is needed to be able to instantiate the + // class from the name. + } + + /** + * Close this ZK connection. + */ + public void close() + throws IOException { + if (zk != null) { + try { + zk.close(); + zk = null; + } catch (InterruptedException e) { + throw new IOException("Closing ZooKeeper connection", e); + } + } + } + + public void startCleanup(Configuration config) { + try { + ZooKeeperCleanup.startInstance(config); + } catch (Exception e) { + LOG.warn("Cleanup instance didn't start."); + } + } + + /** + * Create a node in ZooKeeper + */ + public void create(Type type, String id) + throws IOException { + try { + String[] paths = getPaths(makeZnode(type, id)); + boolean wasCreated = false; + for (String znode : paths) { + try { + zk.create(znode, new byte[0], + Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); + wasCreated = true; + } catch (KeeperException.NodeExistsException e) { + } + } + if (wasCreated) { + try { + // Really not sure if this should go here. Will have + // to see how the storage mechanism evolves. + if (type.equals(Type.JOB)) { + JobStateTracker jt = new JobStateTracker(id, zk, false, + job_trackingpath); + jt.create(); + } + } catch (Exception e) { + LOG.warn("Error tracking: " + e.getMessage()); + // If we couldn't create the tracker node, don't + // create the main node. + zk.delete(makeZnode(type, id), -1); + } + } + if (zk.exists(makeZnode(type, id), false) == null) + throw new IOException("Unable to create " + makeZnode(type, id)); + if (wasCreated) { + try { + saveField(type, id, "created", + Long.toString(System.currentTimeMillis())); + } catch (NotFoundException nfe) { + // Wow, something's really wrong. + throw new IOException("Couldn't write to node " + id, nfe); + } + } + } catch (KeeperException e) { + throw new IOException("Creating " + id, e); + } catch (InterruptedException e) { + throw new IOException("Creating " + id, e); + } + } + + /** + * Get the path based on the job type. + * + * @param type + */ + public String getPath(Type type) { + String typepath = overhead_path; + switch (type) { + case JOB: + typepath = job_path; + break; + case JOBTRACKING: + typepath = job_trackingpath; + break; + } + return typepath; + } + + public static String[] getPaths(String fullpath) { + ArrayList paths = new ArrayList(); + if (fullpath.length() < 2) { + paths.add(fullpath); + } else { + int location = 0; + while ((location = fullpath.indexOf("/", location + 1)) > 0) { + paths.add(fullpath.substring(0, location)); + } + paths.add(fullpath); + } + String[] strings = new String[paths.size()]; + return paths.toArray(strings); + } + + /** + * A helper method that sets a field value. + * @param type + * @param id + * @param name + * @param val + * @throws KeeperException + * @throws UnsupportedEncodingException + * @throws InterruptedException + */ + private void setFieldData(Type type, String id, String name, String val) + throws KeeperException, UnsupportedEncodingException, InterruptedException { + try { + zk.create(makeFieldZnode(type, id, name), + val.getBytes(ENCODING), + Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + } catch (KeeperException.NodeExistsException e) { + zk.setData(makeFieldZnode(type, id, name), + val.getBytes(ENCODING), + -1); + } + } + + /** + * Make a ZK path to the named field. + */ + public String makeFieldZnode(Type type, String id, String name) { + return makeZnode(type, id) + "/" + name; + } + + /** + * Make a ZK path to job + */ + public String makeZnode(Type type, String id) { + return getPath(type) + "/" + id; + } + + @Override + public void saveField(Type type, String id, String key, String val) + throws NotFoundException { + try { + if (val != null) { + create(type, id); + setFieldData(type, id, key, val); + } + } catch (Exception e) { + throw new NotFoundException("Writing " + key + ": " + val + ", " + + e.getMessage()); + } + } + + @Override + public String getField(Type type, String id, String key) { + try { + byte[] b = zk.getData(makeFieldZnode(type, id, key), false, null); + return new String(b, ENCODING); + } catch (Exception e) { + return null; + } + } + + @Override + public Map getFields(Type type, String id) { + HashMap map = new HashMap(); + try { + for (String node : zk.getChildren(makeZnode(type, id), false)) { + byte[] b = zk.getData(makeFieldZnode(type, id, node), + false, null); + map.put(node, new String(b, ENCODING)); + } + } catch (Exception e) { + return map; + } + return map; + } + + @Override + public boolean delete(Type type, String id) throws NotFoundException { + try { + for (String child : zk.getChildren(makeZnode(type, id), false)) { + try { + zk.delete(makeFieldZnode(type, id, child), -1); + } catch (Exception e) { + // Other nodes may be trying to delete this at the same time, + // so just log errors and skip them. + throw new NotFoundException("Couldn't delete " + + makeFieldZnode(type, id, child)); + } + } + try { + zk.delete(makeZnode(type, id), -1); + } catch (Exception e) { + // Same thing -- might be deleted by other nodes, so just go on. + throw new NotFoundException("Couldn't delete " + + makeZnode(type, id)); + } + } catch (Exception e) { + // Error getting children of node -- probably node has been deleted + throw new NotFoundException("Couldn't get children of " + + makeZnode(type, id)); + } + return true; + } + + @Override + public List getAll() { + ArrayList allNodes = new ArrayList(); + for (Type type : Type.values()) { + allNodes.addAll(getAllForType(type)); + } + return allNodes; + } + + @Override + public List getAllForType(Type type) { + try { + return zk.getChildren(getPath(type), false); + } catch (Exception e) { + return new ArrayList(); + } + } + + @Override + public List getAllForKey(String key, String value) { + ArrayList allNodes = new ArrayList(); + try { + for (Type type : Type.values()) { + allNodes.addAll(getAllForTypeAndKey(type, key, value)); + } + } catch (Exception e) { + LOG.info("Couldn't find children."); + } + return allNodes; + } + + @Override + public List getAllForTypeAndKey(Type type, String key, String value) { + ArrayList allNodes = new ArrayList(); + try { + for (String id : zk.getChildren(getPath(type), false)) { + for (String field : zk.getChildren(id, false)) { + if (field.endsWith("/" + key)) { + byte[] b = zk.getData(field, false, null); + if (new String(b, ENCODING).equals(value)) { + allNodes.add(id); + } + } + } + } + } catch (Exception e) { + // Log and go to the next type -- this one might not exist + LOG.info("Couldn't find children of " + getPath(type)); + } + return allNodes; + } + + @Override + public void openStorage(Configuration config) throws IOException { + storage_root = config.get(STORAGE_ROOT); + job_path = storage_root + "/jobs"; + job_trackingpath = storage_root + TRACKINGDIR; + overhead_path = storage_root + "/overhead"; + + if (zk == null) { + zk = zkOpen(config); + } + } + + @Override + public void closeStorage() throws IOException { + close(); + } +} diff --git hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/TestDesc.java hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/TestDesc.java deleted file mode 100644 index 86f3a5d..0000000 --- hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/TestDesc.java +++ /dev/null @@ -1,154 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import java.io.ByteArrayOutputStream; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import junit.framework.TestCase; -import org.codehaus.jackson.map.ObjectMapper; - -/** - * TestDesc - Test the desc objects that are correctly converted to - * and from json. This also sets every field of the TableDesc object. - */ -public class TestDesc extends TestCase { - public void testTableDesc() - throws Exception - { - TableDesc td = buildTableDesc(); - assertNotNull(td); - - String json = toJson(td); - assertTrue(json.length() > 100); - - TableDesc tdCopy = (TableDesc) fromJson(json, TableDesc.class); - assertEquals(td, tdCopy); - } - - private TableDesc buildTableDesc() { - TableDesc x = new TableDesc(); - x.group = "staff"; - x.permissions = "755"; - x.external = true; - x.ifNotExists = true; - x.table = "a_table"; - x.comment = "a comment"; - x.columns = buildColumns(); - x.partitionedBy = buildPartitionedBy(); - x.clusteredBy = buildClusterBy(); - x.format = buildStorageFormat(); - x.location = "hdfs://localhost:9000/user/me/a_table"; - x.tableProperties = buildGenericProperties(); - return x; - } - - public List buildColumns() { - ArrayList x = new ArrayList(); - x.add(new ColumnDesc("id", "bigint", null)); - x.add(new ColumnDesc("price", "float", "The unit price")); - x.add(new ColumnDesc("name", "string", "The item name")); - return x; - } - - public List buildPartitionedBy() { - ArrayList x = new ArrayList(); - x.add(new ColumnDesc("country", "string", "The country of origin")); - return x; - } - - public TableDesc.ClusteredByDesc buildClusterBy() { - TableDesc.ClusteredByDesc x = new TableDesc.ClusteredByDesc(); - x.columnNames = new ArrayList(); - x.columnNames.add("id"); - x.sortedBy = buildSortedBy(); - x.numberOfBuckets = 16; - return x; - } - - public List buildSortedBy() { - ArrayList x - = new ArrayList(); - x.add(new TableDesc.ClusterSortOrderDesc("id", TableDesc.SortDirectionDesc.ASC)); - return x; - } - - public TableDesc.StorageFormatDesc buildStorageFormat() { - TableDesc.StorageFormatDesc x = new TableDesc.StorageFormatDesc(); - x.rowFormat = buildRowFormat(); - x.storedAs = "rcfile"; - x.storedBy = buildStoredBy(); - return x; - } - - public TableDesc.RowFormatDesc buildRowFormat() { - TableDesc.RowFormatDesc x = new TableDesc.RowFormatDesc(); - x.fieldsTerminatedBy = "\u0001"; - x.collectionItemsTerminatedBy = "\u0002"; - x.mapKeysTerminatedBy = "\u0003"; - x.linesTerminatedBy = "\u0004"; - x.serde = buildSerde(); - return x; - } - - public TableDesc.SerdeDesc buildSerde() { - TableDesc.SerdeDesc x = new TableDesc.SerdeDesc(); - x.name = "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"; - x.properties = new HashMap(); - x.properties.put("field.delim", ","); - return x; - } - - public TableDesc.StoredByDesc buildStoredBy() { - TableDesc.StoredByDesc x = new TableDesc.StoredByDesc(); - x.className = "org.apache.hadoop.hive.hbase.HBaseStorageHandler"; - x.properties = new HashMap(); - x.properties.put("hbase.columns.mapping", "cf:string"); - x.properties.put("hbase.table.name", "hbase_table_0"); - return x; - } - - public Map buildGenericProperties() { - HashMap x = new HashMap(); - x.put("carmas", "evil"); - x.put("rachel", "better"); - x.put("ctdean", "angelic"); - x.put("paul", "dangerously unbalanced"); - x.put("dra", "organic"); - return x; - } - - private String toJson(Object obj) - throws Exception - { - ObjectMapper mapper = new ObjectMapper(); - ByteArrayOutputStream out = new ByteArrayOutputStream(); - mapper.writeValue(out, obj); - return out.toString(); - } - - private Object fromJson(String json, Class klass) - throws Exception - { - ObjectMapper mapper = new ObjectMapper(); - return mapper.readValue(json, klass); - } -} diff --git hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/TestServer.java hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/TestServer.java deleted file mode 100644 index 3be3a80..0000000 --- hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/TestServer.java +++ /dev/null @@ -1,54 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import junit.framework.TestCase; - -import org.apache.hcatalog.templeton.mock.MockServer; -import java.util.List; - -/* - * Test that the server code exists, and responds to basic requests. - */ -public class TestServer extends TestCase { - - MockServer server; - - public void setUp() { - new Main(null); // Initialize the config - server = new MockServer(); - } - - public void testServer() { - assertNotNull(server); - } - - public void testStatus() { - assertEquals(server.status().get("status"), "ok"); - } - - public void testVersions() { - assertEquals(server.version().get("version"), "v1"); - } - - public void testFormats() { - assertEquals(1, server.requestFormats().size()); - assertEquals( ((List)server.requestFormats().get("responseTypes")).get(0), "application/json"); - } -} diff --git hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/TestWebHCatE2e.java hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/TestWebHCatE2e.java deleted file mode 100644 index 997d4ba..0000000 --- hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/TestWebHCatE2e.java +++ /dev/null @@ -1,280 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton; - -import junit.framework.Assert; -import org.apache.commons.httpclient.HttpClient; -import org.apache.commons.httpclient.HttpMethod; -import org.apache.commons.httpclient.NameValuePair; -import org.apache.commons.httpclient.methods.DeleteMethod; -import org.apache.commons.httpclient.methods.GetMethod; -import org.apache.commons.httpclient.methods.PutMethod; -import org.apache.commons.httpclient.methods.StringRequestEntity; -import org.apache.hadoop.hive.ql.ErrorMsg; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Ignore; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.eclipse.jetty.http.HttpStatus; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * A set of tests exercising e2e WebHCat DDL APIs. These tests are somewhat - * between WebHCat e2e (hcatalog/src/tests/e2e/templeton) tests and simple58 - * - * unit tests. This will start a WebHCat server and make REST calls to it. - * It doesn't need Hadoop or (standalone) metastore to be running. - * Running this is much simpler than e2e tests. - * - * Most of these tests check that HTTP Status code is what is expected and - * Hive Error code {@link org.apache.hadoop.hive.ql.ErrorMsg} is what is - * expected. - * - * It may be possible to extend this to more than just DDL later. - */ -public class TestWebHCatE2e { - private static final Logger LOG = - LoggerFactory.getLogger(TestWebHCatE2e.class); - private static final String templetonBaseUrl = - "http://localhost:50111/templeton/v1"; - private static final String username= "johndoe"; - private static final String ERROR_CODE = "errorCode"; - private static Main templetonServer; - private static final String charSet = "UTF-8"; - @BeforeClass - public static void startHebHcatInMem() { - templetonServer = new Main(new String[] {"-D" + AppConfig.UNIT_TEST_MODE + "=true"}); - LOG.info("Starting Main"); - templetonServer.run(); - LOG.info("Main started"); - } - @AfterClass - public static void stopWebHcatInMem() { - if(templetonServer != null) { - LOG.info("Stopping Main"); - templetonServer.stop(); - LOG.info("Main stopped"); - } - } - @Test - public void getStatus() throws IOException { - LOG.debug("+getStatus()"); - MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/status", HTTP_METHOD_TYPE.GET); - Assert.assertEquals(p.getAssertMsg(), HttpStatus.OK_200, p.httpStatusCode); - Assert.assertEquals(p.getAssertMsg(), "{\"status\":\"ok\",\"version\":\"v1\"}", p.responseBody); - LOG.debug("-getStatus()"); - } - @Ignore("not ready due to HIVE-4824") - @Test - public void listDataBases() throws IOException { - LOG.debug("+listDataBases()"); - MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/ddl/database", HTTP_METHOD_TYPE.GET); - Assert.assertEquals(p.getAssertMsg(), HttpStatus.OK_200, p.httpStatusCode); - Assert.assertEquals(p.getAssertMsg(), "{\"databases\":[\"default\"]}", p.responseBody); - LOG.debug("-listDataBases()"); - } - /** - * Check that we return correct status code when the URL doesn't map to any method - * in {@link Server} - */ - @Test - public void invalidPath() throws IOException { - MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/no_such_mapping/database", HTTP_METHOD_TYPE.GET); - Assert.assertEquals(p.getAssertMsg(), HttpStatus.NOT_FOUND_404, p.httpStatusCode); - } - /** - * tries to drop table in a DB that doesn't exist - */ - @Ignore("not ready due to HIVE-4824") - @Test - public void dropTableNoSuchDB() throws IOException { - MethodCallRetVal p = doHttpCall(templetonBaseUrl + - "/ddl/database/no_such_db/table/t1", HTTP_METHOD_TYPE.DELETE); - Assert.assertEquals(p.getAssertMsg(), HttpStatus.NOT_FOUND_404, p.httpStatusCode); - Assert.assertEquals(p.getAssertMsg(), - ErrorMsg.DATABASE_NOT_EXISTS.getErrorCode(), - getErrorCode(p.responseBody)); - } - /** - * tries to drop table in a DB that doesn't exist - */ - @Ignore("not ready due to HIVE-4824") - @Test - public void dropTableNoSuchDbIfExists() throws IOException { - MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/ddl/database/no_such_db/table/t1", - HTTP_METHOD_TYPE.DELETE, null, new NameValuePair[] - {new NameValuePair("ifExists", "true")}); - Assert.assertEquals(p.getAssertMsg(), HttpStatus.NOT_FOUND_404, p.httpStatusCode); - Assert.assertEquals(p.getAssertMsg(), ErrorMsg.DATABASE_NOT_EXISTS.getErrorCode(), getErrorCode(p.responseBody)); - } - /** - * tries to drop table that doesn't exist (with ifExists=true) - */ - @Ignore("not ready due to HIVE-4824") - @Test - public void dropTableIfExists() throws IOException { - MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/ddl/database/default/table/no_such_table", - HTTP_METHOD_TYPE.DELETE, null, new NameValuePair[] - {new NameValuePair("ifExists", "true")}); - Assert.assertEquals(p.getAssertMsg(), HttpStatus.OK_200, p.httpStatusCode); - } - @Ignore("not ready due to HIVE-4824") - @Test - public void createDataBase() throws IOException { - Map props = new HashMap(); - props.put("comment", "Hello, there"); - props.put("location", "file://" + System.getProperty("hive.metastore.warehouse.dir")); - Map props2 = new HashMap(); - props2.put("prop", "val"); - props.put("properties", props2); - //{ "comment":"Hello there", "location":"file:///tmp/warehouse", "properties":{"a":"b"}} - MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/ddl/database/newdb", HTTP_METHOD_TYPE.PUT, props, null); - Assert.assertEquals(p.getAssertMsg(), HttpStatus.OK_200, p.httpStatusCode); - } - @Ignore("not ready due to HIVE-4824") - @Test - public void createTable() throws IOException { - //{ "comment":"test", "columns": [ { "name": "col1", "type": "string" } ], "format": { "storedAs": "rcfile" } } - Map props = new HashMap(); - props.put("comment", "Table in default db"); - Map col = new HashMap(); - col.put("name", "col1"); - col.put("type", "string"); - List> colList = new ArrayList>(1); - colList.add(col); - props.put("columns", colList); - Map format = new HashMap(); - format.put("storedAs", "rcfile"); - props.put("format", format); - MethodCallRetVal createTbl = doHttpCall(templetonBaseUrl + "/ddl/database/default/table/test_table", HTTP_METHOD_TYPE.PUT, props, null); - Assert.assertEquals(createTbl.getAssertMsg(), HttpStatus.OK_200, createTbl.httpStatusCode); - LOG.info("createTable() resp: " + createTbl.responseBody); - - MethodCallRetVal descTbl = doHttpCall(templetonBaseUrl + "/ddl/database/default/table/test_table", HTTP_METHOD_TYPE.GET); - Assert.assertEquals(descTbl.getAssertMsg(), HttpStatus.OK_200, descTbl.httpStatusCode); - } - @Ignore("not ready due to HIVE-4824") - @Test - public void describeNoSuchTable() throws IOException { - MethodCallRetVal p = doHttpCall(templetonBaseUrl + - "/ddl/database/default/table/no_such_table", HTTP_METHOD_TYPE.GET); - Assert.assertEquals(p.getAssertMsg(), HttpStatus.NOT_FOUND_404, - p.httpStatusCode); - Assert.assertEquals(p.getAssertMsg(), - ErrorMsg.INVALID_TABLE.getErrorCode(), - getErrorCode(p.responseBody)); - } - /** - * It's expected that Templeton returns a properly formatted JSON object when it - * encounters an error. It should have {@code ERROR_CODE} element in it which - * should be the Hive canonical error msg code. - * @return the code or -1 if it cannot be found - */ - private static int getErrorCode(String jsonErrorObject) throws IOException { - @SuppressWarnings("unchecked")//JSON key is always a String - Map retProps = JsonBuilder.jsonToMap(jsonErrorObject + "blah blah"); - int hiveRetCode = -1; - if(retProps.get(ERROR_CODE) !=null) { - hiveRetCode = Integer.parseInt(retProps.get(ERROR_CODE).toString()); - } - return hiveRetCode; - } - /** - * Encapsulates information from HTTP method call - */ - private static class MethodCallRetVal { - private final int httpStatusCode; - private final String responseBody; - private final String submittedURL; - private final String methodName; - private MethodCallRetVal(int httpStatusCode, String responseBody, String submittedURL, String methodName) { - this.httpStatusCode = httpStatusCode; - this.responseBody = responseBody; - this.submittedURL = submittedURL; - this.methodName = methodName; - } - String getAssertMsg() { - return methodName + " " + submittedURL + " " + responseBody; - } - } - private static enum HTTP_METHOD_TYPE {GET, POST, DELETE, PUT} - private static MethodCallRetVal doHttpCall(String uri, HTTP_METHOD_TYPE type) throws IOException { - return doHttpCall(uri, type, null, null); - } - /** - * Does a basic HTTP GET and returns Http Status code + response body - * Will add the dummy user query string - */ - private static MethodCallRetVal doHttpCall(String uri, HTTP_METHOD_TYPE type, Map data, NameValuePair[] params) throws IOException { - HttpClient client = new HttpClient(); - HttpMethod method; - switch (type) { - case GET: - method = new GetMethod(uri); - break; - case DELETE: - method = new DeleteMethod(uri); - break; - case PUT: - method = new PutMethod(uri); - if(data == null) { - break; - } - String msgBody = JsonBuilder.mapToJson(data); - LOG.info("Msg Body: " + msgBody); - StringRequestEntity sre = new StringRequestEntity(msgBody, "application/json", charSet); - ((PutMethod)method).setRequestEntity(sre); - break; - default: - throw new IllegalArgumentException("Unsupported method type: " + type); - } - if(params == null) { - method.setQueryString(new NameValuePair[] {new NameValuePair("user.name", username)}); - } - else { - NameValuePair[] newParams = new NameValuePair[params.length + 1]; - System.arraycopy(params, 0, newParams, 1, params.length); - newParams[0] = new NameValuePair("user.name", username); - method.setQueryString(newParams); - } - String actualUri = "no URI"; - try { - actualUri = method.getURI().toString();//should this be escaped string? - LOG.debug(type + ": " + method.getURI().getEscapedURI()); - int httpStatus = client.executeMethod(method); - LOG.debug("Http Status Code=" + httpStatus); - String resp = method.getResponseBodyAsString(); - LOG.debug("response: " + resp); - return new MethodCallRetVal(httpStatus, resp, actualUri, method.getName()); - } - catch (IOException ex) { - LOG.error("doHttpCall() failed", ex); - } - finally { - method.releaseConnection(); - } - return new MethodCallRetVal(-1, "Http " + type + " failed; see log file for details", actualUri, method.getName()); - } -} diff --git hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/mock/MockExecService.java hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/mock/MockExecService.java deleted file mode 100644 index f231cff..0000000 --- hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/mock/MockExecService.java +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton.mock; - -import java.io.IOException; -import java.util.List; -import java.util.Map; - -import org.apache.commons.exec.ExecuteException; -import org.apache.hcatalog.templeton.ExecBean; -import org.apache.hcatalog.templeton.ExecService; -import org.apache.hcatalog.templeton.NotAuthorizedException; - -public class MockExecService implements ExecService { - - public ExecBean run(String program, List args, - Map env) { - ExecBean bean = new ExecBean(); - bean.stdout = program; - bean.stderr = args.toString(); - return bean; - } - - @Override - public ExecBean runUnlimited(String program, - List args, Map env) - throws NotAuthorizedException, ExecuteException, IOException { - ExecBean bean = new ExecBean(); - bean.stdout = program; - bean.stderr = args.toString(); - return null; - } -} diff --git hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/mock/MockServer.java hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/mock/MockServer.java deleted file mode 100644 index 8c00ea6..0000000 --- hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/mock/MockServer.java +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton.mock; - -import org.apache.hcatalog.templeton.Server; - -/* - * Test that the server code exists. - */ -public class MockServer extends Server { - public String user; - - public MockServer() { - execService = new MockExecService(); - resetUser(); - } - - public void resetUser() { - user = System.getenv("USER"); - } - - public String getUser() { - return user; - } -} diff --git hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/mock/MockUriInfo.java hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/mock/MockUriInfo.java deleted file mode 100644 index ad48aa2..0000000 --- hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/mock/MockUriInfo.java +++ /dev/null @@ -1,139 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hcatalog.templeton.mock; - -import java.net.URI; -import java.net.URISyntaxException; -import java.util.List; - -import javax.ws.rs.core.MultivaluedMap; -import javax.ws.rs.core.PathSegment; -import javax.ws.rs.core.UriBuilder; -import javax.ws.rs.core.UriInfo; - -public class MockUriInfo implements UriInfo { - - @Override - public URI getAbsolutePath() { - // TODO Auto-generated method stub - return null; - } - - @Override - public UriBuilder getAbsolutePathBuilder() { - // TODO Auto-generated method stub - return null; - } - - @Override - public URI getBaseUri() { - try { - return new URI("http://fakeuri/templeton"); - } catch (URISyntaxException e) { - e.printStackTrace(); - } - return null; - } - - @Override - public UriBuilder getBaseUriBuilder() { - // TODO Auto-generated method stub - return null; - } - - @Override - public List getMatchedResources() { - // TODO Auto-generated method stub - return null; - } - - @Override - public List getMatchedURIs() { - // TODO Auto-generated method stub - return null; - } - - @Override - public List getMatchedURIs(boolean arg0) { - // TODO Auto-generated method stub - return null; - } - - @Override - public String getPath() { - // TODO Auto-generated method stub - return null; - } - - @Override - public String getPath(boolean arg0) { - // TODO Auto-generated method stub - return null; - } - - @Override - public MultivaluedMap getPathParameters() { - // TODO Auto-generated method stub - return null; - } - - @Override - public MultivaluedMap getPathParameters(boolean arg0) { - // TODO Auto-generated method stub - return null; - } - - @Override - public List getPathSegments() { - // TODO Auto-generated method stub - return null; - } - - @Override - public List getPathSegments(boolean arg0) { - // TODO Auto-generated method stub - return null; - } - - @Override - public MultivaluedMap getQueryParameters() { - // TODO Auto-generated method stub - return null; - } - - @Override - public MultivaluedMap getQueryParameters(boolean arg0) { - // TODO Auto-generated method stub - return null; - } - - @Override - public URI getRequestUri() { - // TODO Auto-generated method stub - return null; - } - - @Override - public UriBuilder getRequestUriBuilder() { - // TODO Auto-generated method stub - return null; - } - -} diff --git hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/tool/TestTempletonUtils.java hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/tool/TestTempletonUtils.java deleted file mode 100644 index d0f6661..0000000 --- hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/tool/TestTempletonUtils.java +++ /dev/null @@ -1,248 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton.tool; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.util.StringUtils; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; - -public class TestTempletonUtils { - public static final String[] CONTROLLER_LINES = { - "2011-12-15 18:12:21,758 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - More information at: http://localhost:50030/jobdetails.jsp?jobid=job_201112140012_0047", - "2011-12-15 18:12:46,907 [main] INFO org.apache.pig.tools.pigstats.SimplePigStats - Script Statistics: " - }; - public static final String testDataDir = System.getProperty("test.data.dir"); - File tmpFile; - File usrFile; - - @Before - public void setup() { - try { - tmpFile = new File(testDataDir, "tmp"); - tmpFile.createNewFile(); - usrFile = new File(testDataDir, "usr"); - usrFile.createNewFile(); - } catch (IOException ex) { - Assert.fail(ex.getMessage()); - } - } - - @After - public void tearDown() { - tmpFile.delete(); - usrFile.delete(); - } - - @Test - public void testIssetString() { - Assert.assertFalse(TempletonUtils.isset((String)null)); - Assert.assertFalse(TempletonUtils.isset("")); - Assert.assertTrue(TempletonUtils.isset("hello")); - } - - @Test - public void testIssetTArray() { - Assert.assertFalse(TempletonUtils.isset((Long[]) null)); - Assert.assertFalse(TempletonUtils.isset(new String[0])); - String[] parts = new String("hello.world").split("\\."); - Assert.assertTrue(TempletonUtils.isset(parts)); - } - - @Test - public void testPrintTaggedJobID() { - //JobID job = new JobID(); - // TODO -- capture System.out? - } - - - @Test - public void testExtractPercentComplete() { - Assert.assertNull(TempletonUtils.extractPercentComplete("fred")); - for (String line : CONTROLLER_LINES) { - Assert.assertNull(TempletonUtils.extractPercentComplete(line)); - } - - String fifty = "2011-12-15 18:12:36,333 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - 50% complete"; - Assert.assertEquals("50% complete", TempletonUtils.extractPercentComplete(fifty)); - } - - @Test - public void testEncodeArray() { - Assert.assertEquals(null, TempletonUtils.encodeArray((String []) null)); - String[] tmp = new String[0]; - Assert.assertTrue(TempletonUtils.encodeArray(new String[0]).length() == 0); - tmp = new String[3]; - tmp[0] = "fred"; - tmp[1] = null; - tmp[2] = "peter,lisa,, barney"; - Assert.assertEquals("fred,,peter" + - StringUtils.ESCAPE_CHAR + ",lisa" + StringUtils.ESCAPE_CHAR + "," + - StringUtils.ESCAPE_CHAR + ", barney", - TempletonUtils.encodeArray(tmp)); - } - - @Test - public void testDecodeArray() { - Assert.assertTrue(TempletonUtils.encodeArray((String[]) null) == null); - String[] tmp = new String[3]; - tmp[0] = "fred"; - tmp[1] = null; - tmp[2] = "peter,lisa,, barney"; - String[] tmp2 = TempletonUtils.decodeArray(TempletonUtils.encodeArray(tmp)); - try { - for (int i=0; i< tmp.length; i++) { - Assert.assertEquals((String) tmp[i], (String)tmp2[i]); - } - } catch (Exception e) { - Assert.fail("Arrays were not equal" + e.getMessage()); - } - } - - @Test - public void testHadoopFsPath() { - try { - TempletonUtils.hadoopFsPath(null, null, null); - TempletonUtils.hadoopFsPath(tmpFile.toURI().toString(), null, null); - TempletonUtils.hadoopFsPath(tmpFile.toURI().toString(), new Configuration(), null); - } catch (FileNotFoundException e) { - Assert.fail("Couldn't find " + tmpFile.toURI().toString()); - } catch (Exception e) { - // This is our problem -- it means the configuration was wrong. - e.printStackTrace(); - } - try { - TempletonUtils.hadoopFsPath("/scoobydoo/teddybear", - new Configuration(), null); - Assert.fail("Should not have found /scoobydoo/teddybear"); - } catch (FileNotFoundException e) { - // Should go here. - } catch (Exception e) { - // This is our problem -- it means the configuration was wrong. - e.printStackTrace(); - } - try { - TempletonUtils.hadoopFsPath("a", new Configuration(), "teddybear"); - Assert.fail("Should not have found /user/teddybear/a"); - } catch (FileNotFoundException e) { - Assert.assertTrue(e.getMessage().contains("/user/teddybear/a")); - } catch (Exception e) { - // This is our problem -- it means the configuration was wrong. - e.printStackTrace(); - Assert.fail("Get wrong exception: " + e.getMessage()); - } - } - - @Test - public void testHadoopFsFilename() { - try { - Assert.assertEquals(null, TempletonUtils.hadoopFsFilename(null, null, null)); - Assert.assertEquals(null, - TempletonUtils.hadoopFsFilename(tmpFile.toURI().toString(), null, null)); - Assert.assertEquals(tmpFile.toURI().toString(), - TempletonUtils.hadoopFsFilename(tmpFile.toURI().toString(), - new Configuration(), - null)); - } catch (FileNotFoundException e) { - Assert.fail("Couldn't find name for /tmp"); - Assert.fail("Couldn't find name for " + tmpFile.toURI().toString()); - } catch (Exception e) { - // Something else is wrong - e.printStackTrace(); - } - try { - TempletonUtils.hadoopFsFilename("/scoobydoo/teddybear", - new Configuration(), null); - Assert.fail("Should not have found /scoobydoo/teddybear"); - } catch (FileNotFoundException e) { - // Should go here. - } catch (Exception e) { - // Something else is wrong. - e.printStackTrace(); - } - } - - @Test - public void testHadoopFsListAsArray() { - try { - Assert.assertTrue(TempletonUtils.hadoopFsListAsArray(null, null, null) == null); - Assert.assertTrue(TempletonUtils.hadoopFsListAsArray( - tmpFile.toURI().toString() + "," + usrFile.toString(), null, null) == null); - String[] tmp2 = TempletonUtils.hadoopFsListAsArray( - tmpFile.toURI().toString() + "," + usrFile.toURI().toString(), - new Configuration(), null); - Assert.assertEquals(tmpFile.toURI().toString(), tmp2[0]); - Assert.assertEquals(usrFile.toURI().toString(), tmp2[1]); - } catch (FileNotFoundException e) { - Assert.fail("Couldn't find name for " + tmpFile.toURI().toString()); - } catch (Exception e) { - // Something else is wrong - e.printStackTrace(); - } - try { - TempletonUtils.hadoopFsListAsArray("/scoobydoo/teddybear,joe", - new Configuration(), - null); - Assert.fail("Should not have found /scoobydoo/teddybear"); - } catch (FileNotFoundException e) { - // Should go here. - } catch (Exception e) { - // Something else is wrong. - e.printStackTrace(); - } - } - - @Test - public void testHadoopFsListAsString() { - try { - Assert.assertTrue(TempletonUtils.hadoopFsListAsString(null, null, null) == null); - Assert.assertTrue(TempletonUtils.hadoopFsListAsString( - tmpFile.toURI().toString() + "," + usrFile.toURI().toString(), - null, null) == null); - Assert.assertEquals( - tmpFile.toURI().toString() + "," + usrFile.toURI().toString(), - TempletonUtils.hadoopFsListAsString( - tmpFile.toURI().toString() + "," + usrFile.toURI().toString(), - new Configuration(), null)); - } catch (FileNotFoundException e) { - Assert.fail("Couldn't find name for " + tmpFile.toURI().toString()); - } catch (Exception e) { - // Something else is wrong - e.printStackTrace(); - } - try { - TempletonUtils.hadoopFsListAsString("/scoobydoo/teddybear,joe", - new Configuration(), - null); - Assert.fail("Should not have found /scoobydoo/teddybear"); - } catch (FileNotFoundException e) { - // Should go here. - } catch (Exception e) { - // Something else is wrong. - e.printStackTrace(); - } - } - -} diff --git hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/tool/TestTrivialExecService.java hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/tool/TestTrivialExecService.java deleted file mode 100644 index f8084fd..0000000 --- hcatalog/webhcat/svr/src/test/java/org/apache/hcatalog/templeton/tool/TestTrivialExecService.java +++ /dev/null @@ -1,69 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hcatalog.templeton.tool; - -import java.io.BufferedReader; -import java.io.InputStreamReader; -import java.util.ArrayList; -import java.util.HashMap; - -import org.junit.Assert; -import org.junit.Test; - -public class TestTrivialExecService { - @Test - public void test() { - ArrayList list = new ArrayList(); - list.add("echo"); - list.add("success"); - BufferedReader out = null; - BufferedReader err = null; - try { - Process process = TrivialExecService.getInstance() - .run(list, - new ArrayList(), - new HashMap()); - out = new BufferedReader(new InputStreamReader( - process.getInputStream())); - err = new BufferedReader(new InputStreamReader( - process.getErrorStream())); - Assert.assertEquals("success", out.readLine()); - out.close(); - String line; - while ((line = err.readLine()) != null) { - Assert.fail(line); - } - process.waitFor(); - } catch (Exception e) { - e.printStackTrace(); - Assert.fail("Process caused exception."); - } finally { - try { - out.close(); - } catch (Exception ex) { - // Whatever. - } - try { - err.close(); - } catch (Exception ex) { - // Whatever - } - } - } -} diff --git hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/TestDesc.java hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/TestDesc.java new file mode 100644 index 0000000..5e0463d --- /dev/null +++ hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/TestDesc.java @@ -0,0 +1,154 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import java.io.ByteArrayOutputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import junit.framework.TestCase; +import org.codehaus.jackson.map.ObjectMapper; + +/** + * TestDesc - Test the desc objects that are correctly converted to + * and from json. This also sets every field of the TableDesc object. + */ +public class TestDesc extends TestCase { + public void testTableDesc() + throws Exception + { + TableDesc td = buildTableDesc(); + assertNotNull(td); + + String json = toJson(td); + assertTrue(json.length() > 100); + + TableDesc tdCopy = (TableDesc) fromJson(json, TableDesc.class); + assertEquals(td, tdCopy); + } + + private TableDesc buildTableDesc() { + TableDesc x = new TableDesc(); + x.group = "staff"; + x.permissions = "755"; + x.external = true; + x.ifNotExists = true; + x.table = "a_table"; + x.comment = "a comment"; + x.columns = buildColumns(); + x.partitionedBy = buildPartitionedBy(); + x.clusteredBy = buildClusterBy(); + x.format = buildStorageFormat(); + x.location = "hdfs://localhost:9000/user/me/a_table"; + x.tableProperties = buildGenericProperties(); + return x; + } + + public List buildColumns() { + ArrayList x = new ArrayList(); + x.add(new ColumnDesc("id", "bigint", null)); + x.add(new ColumnDesc("price", "float", "The unit price")); + x.add(new ColumnDesc("name", "string", "The item name")); + return x; + } + + public List buildPartitionedBy() { + ArrayList x = new ArrayList(); + x.add(new ColumnDesc("country", "string", "The country of origin")); + return x; + } + + public TableDesc.ClusteredByDesc buildClusterBy() { + TableDesc.ClusteredByDesc x = new TableDesc.ClusteredByDesc(); + x.columnNames = new ArrayList(); + x.columnNames.add("id"); + x.sortedBy = buildSortedBy(); + x.numberOfBuckets = 16; + return x; + } + + public List buildSortedBy() { + ArrayList x + = new ArrayList(); + x.add(new TableDesc.ClusterSortOrderDesc("id", TableDesc.SortDirectionDesc.ASC)); + return x; + } + + public TableDesc.StorageFormatDesc buildStorageFormat() { + TableDesc.StorageFormatDesc x = new TableDesc.StorageFormatDesc(); + x.rowFormat = buildRowFormat(); + x.storedAs = "rcfile"; + x.storedBy = buildStoredBy(); + return x; + } + + public TableDesc.RowFormatDesc buildRowFormat() { + TableDesc.RowFormatDesc x = new TableDesc.RowFormatDesc(); + x.fieldsTerminatedBy = "\u0001"; + x.collectionItemsTerminatedBy = "\u0002"; + x.mapKeysTerminatedBy = "\u0003"; + x.linesTerminatedBy = "\u0004"; + x.serde = buildSerde(); + return x; + } + + public TableDesc.SerdeDesc buildSerde() { + TableDesc.SerdeDesc x = new TableDesc.SerdeDesc(); + x.name = "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"; + x.properties = new HashMap(); + x.properties.put("field.delim", ","); + return x; + } + + public TableDesc.StoredByDesc buildStoredBy() { + TableDesc.StoredByDesc x = new TableDesc.StoredByDesc(); + x.className = "org.apache.hadoop.hive.hbase.HBaseStorageHandler"; + x.properties = new HashMap(); + x.properties.put("hbase.columns.mapping", "cf:string"); + x.properties.put("hbase.table.name", "hbase_table_0"); + return x; + } + + public Map buildGenericProperties() { + HashMap x = new HashMap(); + x.put("carmas", "evil"); + x.put("rachel", "better"); + x.put("ctdean", "angelic"); + x.put("paul", "dangerously unbalanced"); + x.put("dra", "organic"); + return x; + } + + private String toJson(Object obj) + throws Exception + { + ObjectMapper mapper = new ObjectMapper(); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + mapper.writeValue(out, obj); + return out.toString(); + } + + private Object fromJson(String json, Class klass) + throws Exception + { + ObjectMapper mapper = new ObjectMapper(); + return mapper.readValue(json, klass); + } +} diff --git hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/TestServer.java hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/TestServer.java new file mode 100644 index 0000000..7cf5c35 --- /dev/null +++ hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/TestServer.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import junit.framework.TestCase; + +import org.apache.hive.hcatalog.templeton.mock.MockServer; +import java.util.List; + +/* + * Test that the server code exists, and responds to basic requests. + */ +public class TestServer extends TestCase { + + MockServer server; + + public void setUp() { + new Main(null); // Initialize the config + server = new MockServer(); + } + + public void testServer() { + assertNotNull(server); + } + + public void testStatus() { + assertEquals(server.status().get("status"), "ok"); + } + + public void testVersions() { + assertEquals(server.version().get("version"), "v1"); + } + + public void testFormats() { + assertEquals(1, server.requestFormats().size()); + assertEquals( ((List)server.requestFormats().get("responseTypes")).get(0), "application/json"); + } +} diff --git hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/TestWebHCatE2e.java hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/TestWebHCatE2e.java new file mode 100644 index 0000000..4897ae2 --- /dev/null +++ hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/TestWebHCatE2e.java @@ -0,0 +1,280 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton; + +import junit.framework.Assert; +import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.HttpMethod; +import org.apache.commons.httpclient.NameValuePair; +import org.apache.commons.httpclient.methods.DeleteMethod; +import org.apache.commons.httpclient.methods.GetMethod; +import org.apache.commons.httpclient.methods.PutMethod; +import org.apache.commons.httpclient.methods.StringRequestEntity; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Ignore; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.eclipse.jetty.http.HttpStatus; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * A set of tests exercising e2e WebHCat DDL APIs. These tests are somewhat + * between WebHCat e2e (hcatalog/src/tests/e2e/templeton) tests and simple58 + * + * unit tests. This will start a WebHCat server and make REST calls to it. + * It doesn't need Hadoop or (standalone) metastore to be running. + * Running this is much simpler than e2e tests. + * + * Most of these tests check that HTTP Status code is what is expected and + * Hive Error code {@link org.apache.hadoop.hive.ql.ErrorMsg} is what is + * expected. + * + * It may be possible to extend this to more than just DDL later. + */ +public class TestWebHCatE2e { + private static final Logger LOG = + LoggerFactory.getLogger(TestWebHCatE2e.class); + private static final String templetonBaseUrl = + "http://localhost:50111/templeton/v1"; + private static final String username= "johndoe"; + private static final String ERROR_CODE = "errorCode"; + private static Main templetonServer; + private static final String charSet = "UTF-8"; + @BeforeClass + public static void startHebHcatInMem() { + templetonServer = new Main(new String[] {"-D" + AppConfig.UNIT_TEST_MODE + "=true"}); + LOG.info("Starting Main"); + templetonServer.run(); + LOG.info("Main started"); + } + @AfterClass + public static void stopWebHcatInMem() { + if(templetonServer != null) { + LOG.info("Stopping Main"); + templetonServer.stop(); + LOG.info("Main stopped"); + } + } + @Test + public void getStatus() throws IOException { + LOG.debug("+getStatus()"); + MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/status", HTTP_METHOD_TYPE.GET); + Assert.assertEquals(p.getAssertMsg(), HttpStatus.OK_200, p.httpStatusCode); + Assert.assertEquals(p.getAssertMsg(), "{\"status\":\"ok\",\"version\":\"v1\"}", p.responseBody); + LOG.debug("-getStatus()"); + } + @Ignore("not ready due to HIVE-4824") + @Test + public void listDataBases() throws IOException { + LOG.debug("+listDataBases()"); + MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/ddl/database", HTTP_METHOD_TYPE.GET); + Assert.assertEquals(p.getAssertMsg(), HttpStatus.OK_200, p.httpStatusCode); + Assert.assertEquals(p.getAssertMsg(), "{\"databases\":[\"default\"]}", p.responseBody); + LOG.debug("-listDataBases()"); + } + /** + * Check that we return correct status code when the URL doesn't map to any method + * in {@link Server} + */ + @Test + public void invalidPath() throws IOException { + MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/no_such_mapping/database", HTTP_METHOD_TYPE.GET); + Assert.assertEquals(p.getAssertMsg(), HttpStatus.NOT_FOUND_404, p.httpStatusCode); + } + /** + * tries to drop table in a DB that doesn't exist + */ + @Ignore("not ready due to HIVE-4824") + @Test + public void dropTableNoSuchDB() throws IOException { + MethodCallRetVal p = doHttpCall(templetonBaseUrl + + "/ddl/database/no_such_db/table/t1", HTTP_METHOD_TYPE.DELETE); + Assert.assertEquals(p.getAssertMsg(), HttpStatus.NOT_FOUND_404, p.httpStatusCode); + Assert.assertEquals(p.getAssertMsg(), + ErrorMsg.DATABASE_NOT_EXISTS.getErrorCode(), + getErrorCode(p.responseBody)); + } + /** + * tries to drop table in a DB that doesn't exist + */ + @Ignore("not ready due to HIVE-4824") + @Test + public void dropTableNoSuchDbIfExists() throws IOException { + MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/ddl/database/no_such_db/table/t1", + HTTP_METHOD_TYPE.DELETE, null, new NameValuePair[] + {new NameValuePair("ifExists", "true")}); + Assert.assertEquals(p.getAssertMsg(), HttpStatus.NOT_FOUND_404, p.httpStatusCode); + Assert.assertEquals(p.getAssertMsg(), ErrorMsg.DATABASE_NOT_EXISTS.getErrorCode(), getErrorCode(p.responseBody)); + } + /** + * tries to drop table that doesn't exist (with ifExists=true) + */ + @Ignore("not ready due to HIVE-4824") + @Test + public void dropTableIfExists() throws IOException { + MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/ddl/database/default/table/no_such_table", + HTTP_METHOD_TYPE.DELETE, null, new NameValuePair[] + {new NameValuePair("ifExists", "true")}); + Assert.assertEquals(p.getAssertMsg(), HttpStatus.OK_200, p.httpStatusCode); + } + @Ignore("not ready due to HIVE-4824") + @Test + public void createDataBase() throws IOException { + Map props = new HashMap(); + props.put("comment", "Hello, there"); + props.put("location", "file://" + System.getProperty("hive.metastore.warehouse.dir")); + Map props2 = new HashMap(); + props2.put("prop", "val"); + props.put("properties", props2); + //{ "comment":"Hello there", "location":"file:///tmp/warehouse", "properties":{"a":"b"}} + MethodCallRetVal p = doHttpCall(templetonBaseUrl + "/ddl/database/newdb", HTTP_METHOD_TYPE.PUT, props, null); + Assert.assertEquals(p.getAssertMsg(), HttpStatus.OK_200, p.httpStatusCode); + } + @Ignore("not ready due to HIVE-4824") + @Test + public void createTable() throws IOException { + //{ "comment":"test", "columns": [ { "name": "col1", "type": "string" } ], "format": { "storedAs": "rcfile" } } + Map props = new HashMap(); + props.put("comment", "Table in default db"); + Map col = new HashMap(); + col.put("name", "col1"); + col.put("type", "string"); + List> colList = new ArrayList>(1); + colList.add(col); + props.put("columns", colList); + Map format = new HashMap(); + format.put("storedAs", "rcfile"); + props.put("format", format); + MethodCallRetVal createTbl = doHttpCall(templetonBaseUrl + "/ddl/database/default/table/test_table", HTTP_METHOD_TYPE.PUT, props, null); + Assert.assertEquals(createTbl.getAssertMsg(), HttpStatus.OK_200, createTbl.httpStatusCode); + LOG.info("createTable() resp: " + createTbl.responseBody); + + MethodCallRetVal descTbl = doHttpCall(templetonBaseUrl + "/ddl/database/default/table/test_table", HTTP_METHOD_TYPE.GET); + Assert.assertEquals(descTbl.getAssertMsg(), HttpStatus.OK_200, descTbl.httpStatusCode); + } + @Ignore("not ready due to HIVE-4824") + @Test + public void describeNoSuchTable() throws IOException { + MethodCallRetVal p = doHttpCall(templetonBaseUrl + + "/ddl/database/default/table/no_such_table", HTTP_METHOD_TYPE.GET); + Assert.assertEquals(p.getAssertMsg(), HttpStatus.NOT_FOUND_404, + p.httpStatusCode); + Assert.assertEquals(p.getAssertMsg(), + ErrorMsg.INVALID_TABLE.getErrorCode(), + getErrorCode(p.responseBody)); + } + /** + * It's expected that Templeton returns a properly formatted JSON object when it + * encounters an error. It should have {@code ERROR_CODE} element in it which + * should be the Hive canonical error msg code. + * @return the code or -1 if it cannot be found + */ + private static int getErrorCode(String jsonErrorObject) throws IOException { + @SuppressWarnings("unchecked")//JSON key is always a String + Map retProps = JsonBuilder.jsonToMap(jsonErrorObject + "blah blah"); + int hiveRetCode = -1; + if(retProps.get(ERROR_CODE) !=null) { + hiveRetCode = Integer.parseInt(retProps.get(ERROR_CODE).toString()); + } + return hiveRetCode; + } + /** + * Encapsulates information from HTTP method call + */ + private static class MethodCallRetVal { + private final int httpStatusCode; + private final String responseBody; + private final String submittedURL; + private final String methodName; + private MethodCallRetVal(int httpStatusCode, String responseBody, String submittedURL, String methodName) { + this.httpStatusCode = httpStatusCode; + this.responseBody = responseBody; + this.submittedURL = submittedURL; + this.methodName = methodName; + } + String getAssertMsg() { + return methodName + " " + submittedURL + " " + responseBody; + } + } + private static enum HTTP_METHOD_TYPE {GET, POST, DELETE, PUT} + private static MethodCallRetVal doHttpCall(String uri, HTTP_METHOD_TYPE type) throws IOException { + return doHttpCall(uri, type, null, null); + } + /** + * Does a basic HTTP GET and returns Http Status code + response body + * Will add the dummy user query string + */ + private static MethodCallRetVal doHttpCall(String uri, HTTP_METHOD_TYPE type, Map data, NameValuePair[] params) throws IOException { + HttpClient client = new HttpClient(); + HttpMethod method; + switch (type) { + case GET: + method = new GetMethod(uri); + break; + case DELETE: + method = new DeleteMethod(uri); + break; + case PUT: + method = new PutMethod(uri); + if(data == null) { + break; + } + String msgBody = JsonBuilder.mapToJson(data); + LOG.info("Msg Body: " + msgBody); + StringRequestEntity sre = new StringRequestEntity(msgBody, "application/json", charSet); + ((PutMethod)method).setRequestEntity(sre); + break; + default: + throw new IllegalArgumentException("Unsupported method type: " + type); + } + if(params == null) { + method.setQueryString(new NameValuePair[] {new NameValuePair("user.name", username)}); + } + else { + NameValuePair[] newParams = new NameValuePair[params.length + 1]; + System.arraycopy(params, 0, newParams, 1, params.length); + newParams[0] = new NameValuePair("user.name", username); + method.setQueryString(newParams); + } + String actualUri = "no URI"; + try { + actualUri = method.getURI().toString();//should this be escaped string? + LOG.debug(type + ": " + method.getURI().getEscapedURI()); + int httpStatus = client.executeMethod(method); + LOG.debug("Http Status Code=" + httpStatus); + String resp = method.getResponseBodyAsString(); + LOG.debug("response: " + resp); + return new MethodCallRetVal(httpStatus, resp, actualUri, method.getName()); + } + catch (IOException ex) { + LOG.error("doHttpCall() failed", ex); + } + finally { + method.releaseConnection(); + } + return new MethodCallRetVal(-1, "Http " + type + " failed; see log file for details", actualUri, method.getName()); + } +} diff --git hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/mock/MockExecService.java hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/mock/MockExecService.java new file mode 100644 index 0000000..90a9551 --- /dev/null +++ hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/mock/MockExecService.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton.mock; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +import org.apache.commons.exec.ExecuteException; +import org.apache.hive.hcatalog.templeton.ExecBean; +import org.apache.hive.hcatalog.templeton.ExecService; +import org.apache.hive.hcatalog.templeton.NotAuthorizedException; + +public class MockExecService implements ExecService { + + public ExecBean run(String program, List args, + Map env) { + ExecBean bean = new ExecBean(); + bean.stdout = program; + bean.stderr = args.toString(); + return bean; + } + + @Override + public ExecBean runUnlimited(String program, + List args, Map env) + throws NotAuthorizedException, ExecuteException, IOException { + ExecBean bean = new ExecBean(); + bean.stdout = program; + bean.stderr = args.toString(); + return null; + } +} diff --git hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/mock/MockServer.java hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/mock/MockServer.java new file mode 100644 index 0000000..0bee30a --- /dev/null +++ hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/mock/MockServer.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton.mock; + +import org.apache.hive.hcatalog.templeton.Server; + +/* + * Test that the server code exists. + */ +public class MockServer extends Server { + public String user; + + public MockServer() { + execService = new MockExecService(); + resetUser(); + } + + public void resetUser() { + user = System.getenv("USER"); + } + + public String getUser() { + return user; + } +} diff --git hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/mock/MockUriInfo.java hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/mock/MockUriInfo.java new file mode 100644 index 0000000..4ef71b6 --- /dev/null +++ hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/mock/MockUriInfo.java @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hive.hcatalog.templeton.mock; + +import java.net.URI; +import java.net.URISyntaxException; +import java.util.List; + +import javax.ws.rs.core.MultivaluedMap; +import javax.ws.rs.core.PathSegment; +import javax.ws.rs.core.UriBuilder; +import javax.ws.rs.core.UriInfo; + +public class MockUriInfo implements UriInfo { + + @Override + public URI getAbsolutePath() { + // TODO Auto-generated method stub + return null; + } + + @Override + public UriBuilder getAbsolutePathBuilder() { + // TODO Auto-generated method stub + return null; + } + + @Override + public URI getBaseUri() { + try { + return new URI("http://fakeuri/templeton"); + } catch (URISyntaxException e) { + e.printStackTrace(); + } + return null; + } + + @Override + public UriBuilder getBaseUriBuilder() { + // TODO Auto-generated method stub + return null; + } + + @Override + public List getMatchedResources() { + // TODO Auto-generated method stub + return null; + } + + @Override + public List getMatchedURIs() { + // TODO Auto-generated method stub + return null; + } + + @Override + public List getMatchedURIs(boolean arg0) { + // TODO Auto-generated method stub + return null; + } + + @Override + public String getPath() { + // TODO Auto-generated method stub + return null; + } + + @Override + public String getPath(boolean arg0) { + // TODO Auto-generated method stub + return null; + } + + @Override + public MultivaluedMap getPathParameters() { + // TODO Auto-generated method stub + return null; + } + + @Override + public MultivaluedMap getPathParameters(boolean arg0) { + // TODO Auto-generated method stub + return null; + } + + @Override + public List getPathSegments() { + // TODO Auto-generated method stub + return null; + } + + @Override + public List getPathSegments(boolean arg0) { + // TODO Auto-generated method stub + return null; + } + + @Override + public MultivaluedMap getQueryParameters() { + // TODO Auto-generated method stub + return null; + } + + @Override + public MultivaluedMap getQueryParameters(boolean arg0) { + // TODO Auto-generated method stub + return null; + } + + @Override + public URI getRequestUri() { + // TODO Auto-generated method stub + return null; + } + + @Override + public UriBuilder getRequestUriBuilder() { + // TODO Auto-generated method stub + return null; + } + +} diff --git hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTempletonUtils.java hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTempletonUtils.java new file mode 100644 index 0000000..9e4f87a --- /dev/null +++ hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTempletonUtils.java @@ -0,0 +1,248 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton.tool; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.StringUtils; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class TestTempletonUtils { + public static final String[] CONTROLLER_LINES = { + "2011-12-15 18:12:21,758 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - More information at: http://localhost:50030/jobdetails.jsp?jobid=job_201112140012_0047", + "2011-12-15 18:12:46,907 [main] INFO org.apache.pig.tools.pigstats.SimplePigStats - Script Statistics: " + }; + public static final String testDataDir = System.getProperty("test.data.dir"); + File tmpFile; + File usrFile; + + @Before + public void setup() { + try { + tmpFile = new File(testDataDir, "tmp"); + tmpFile.createNewFile(); + usrFile = new File(testDataDir, "usr"); + usrFile.createNewFile(); + } catch (IOException ex) { + Assert.fail(ex.getMessage()); + } + } + + @After + public void tearDown() { + tmpFile.delete(); + usrFile.delete(); + } + + @Test + public void testIssetString() { + Assert.assertFalse(TempletonUtils.isset((String)null)); + Assert.assertFalse(TempletonUtils.isset("")); + Assert.assertTrue(TempletonUtils.isset("hello")); + } + + @Test + public void testIssetTArray() { + Assert.assertFalse(TempletonUtils.isset((Long[]) null)); + Assert.assertFalse(TempletonUtils.isset(new String[0])); + String[] parts = new String("hello.world").split("\\."); + Assert.assertTrue(TempletonUtils.isset(parts)); + } + + @Test + public void testPrintTaggedJobID() { + //JobID job = new JobID(); + // TODO -- capture System.out? + } + + + @Test + public void testExtractPercentComplete() { + Assert.assertNull(TempletonUtils.extractPercentComplete("fred")); + for (String line : CONTROLLER_LINES) { + Assert.assertNull(TempletonUtils.extractPercentComplete(line)); + } + + String fifty = "2011-12-15 18:12:36,333 [main] INFO org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher - 50% complete"; + Assert.assertEquals("50% complete", TempletonUtils.extractPercentComplete(fifty)); + } + + @Test + public void testEncodeArray() { + Assert.assertEquals(null, TempletonUtils.encodeArray((String []) null)); + String[] tmp = new String[0]; + Assert.assertTrue(TempletonUtils.encodeArray(new String[0]).length() == 0); + tmp = new String[3]; + tmp[0] = "fred"; + tmp[1] = null; + tmp[2] = "peter,lisa,, barney"; + Assert.assertEquals("fred,,peter" + + StringUtils.ESCAPE_CHAR + ",lisa" + StringUtils.ESCAPE_CHAR + "," + + StringUtils.ESCAPE_CHAR + ", barney", + TempletonUtils.encodeArray(tmp)); + } + + @Test + public void testDecodeArray() { + Assert.assertTrue(TempletonUtils.encodeArray((String[]) null) == null); + String[] tmp = new String[3]; + tmp[0] = "fred"; + tmp[1] = null; + tmp[2] = "peter,lisa,, barney"; + String[] tmp2 = TempletonUtils.decodeArray(TempletonUtils.encodeArray(tmp)); + try { + for (int i=0; i< tmp.length; i++) { + Assert.assertEquals((String) tmp[i], (String)tmp2[i]); + } + } catch (Exception e) { + Assert.fail("Arrays were not equal" + e.getMessage()); + } + } + + @Test + public void testHadoopFsPath() { + try { + TempletonUtils.hadoopFsPath(null, null, null); + TempletonUtils.hadoopFsPath(tmpFile.toURI().toString(), null, null); + TempletonUtils.hadoopFsPath(tmpFile.toURI().toString(), new Configuration(), null); + } catch (FileNotFoundException e) { + Assert.fail("Couldn't find " + tmpFile.toURI().toString()); + } catch (Exception e) { + // This is our problem -- it means the configuration was wrong. + e.printStackTrace(); + } + try { + TempletonUtils.hadoopFsPath("/scoobydoo/teddybear", + new Configuration(), null); + Assert.fail("Should not have found /scoobydoo/teddybear"); + } catch (FileNotFoundException e) { + // Should go here. + } catch (Exception e) { + // This is our problem -- it means the configuration was wrong. + e.printStackTrace(); + } + try { + TempletonUtils.hadoopFsPath("a", new Configuration(), "teddybear"); + Assert.fail("Should not have found /user/teddybear/a"); + } catch (FileNotFoundException e) { + Assert.assertTrue(e.getMessage().contains("/user/teddybear/a")); + } catch (Exception e) { + // This is our problem -- it means the configuration was wrong. + e.printStackTrace(); + Assert.fail("Get wrong exception: " + e.getMessage()); + } + } + + @Test + public void testHadoopFsFilename() { + try { + Assert.assertEquals(null, TempletonUtils.hadoopFsFilename(null, null, null)); + Assert.assertEquals(null, + TempletonUtils.hadoopFsFilename(tmpFile.toURI().toString(), null, null)); + Assert.assertEquals(tmpFile.toURI().toString(), + TempletonUtils.hadoopFsFilename(tmpFile.toURI().toString(), + new Configuration(), + null)); + } catch (FileNotFoundException e) { + Assert.fail("Couldn't find name for /tmp"); + Assert.fail("Couldn't find name for " + tmpFile.toURI().toString()); + } catch (Exception e) { + // Something else is wrong + e.printStackTrace(); + } + try { + TempletonUtils.hadoopFsFilename("/scoobydoo/teddybear", + new Configuration(), null); + Assert.fail("Should not have found /scoobydoo/teddybear"); + } catch (FileNotFoundException e) { + // Should go here. + } catch (Exception e) { + // Something else is wrong. + e.printStackTrace(); + } + } + + @Test + public void testHadoopFsListAsArray() { + try { + Assert.assertTrue(TempletonUtils.hadoopFsListAsArray(null, null, null) == null); + Assert.assertTrue(TempletonUtils.hadoopFsListAsArray( + tmpFile.toURI().toString() + "," + usrFile.toString(), null, null) == null); + String[] tmp2 = TempletonUtils.hadoopFsListAsArray( + tmpFile.toURI().toString() + "," + usrFile.toURI().toString(), + new Configuration(), null); + Assert.assertEquals(tmpFile.toURI().toString(), tmp2[0]); + Assert.assertEquals(usrFile.toURI().toString(), tmp2[1]); + } catch (FileNotFoundException e) { + Assert.fail("Couldn't find name for " + tmpFile.toURI().toString()); + } catch (Exception e) { + // Something else is wrong + e.printStackTrace(); + } + try { + TempletonUtils.hadoopFsListAsArray("/scoobydoo/teddybear,joe", + new Configuration(), + null); + Assert.fail("Should not have found /scoobydoo/teddybear"); + } catch (FileNotFoundException e) { + // Should go here. + } catch (Exception e) { + // Something else is wrong. + e.printStackTrace(); + } + } + + @Test + public void testHadoopFsListAsString() { + try { + Assert.assertTrue(TempletonUtils.hadoopFsListAsString(null, null, null) == null); + Assert.assertTrue(TempletonUtils.hadoopFsListAsString( + tmpFile.toURI().toString() + "," + usrFile.toURI().toString(), + null, null) == null); + Assert.assertEquals( + tmpFile.toURI().toString() + "," + usrFile.toURI().toString(), + TempletonUtils.hadoopFsListAsString( + tmpFile.toURI().toString() + "," + usrFile.toURI().toString(), + new Configuration(), null)); + } catch (FileNotFoundException e) { + Assert.fail("Couldn't find name for " + tmpFile.toURI().toString()); + } catch (Exception e) { + // Something else is wrong + e.printStackTrace(); + } + try { + TempletonUtils.hadoopFsListAsString("/scoobydoo/teddybear,joe", + new Configuration(), + null); + Assert.fail("Should not have found /scoobydoo/teddybear"); + } catch (FileNotFoundException e) { + // Should go here. + } catch (Exception e) { + // Something else is wrong. + e.printStackTrace(); + } + } + +} diff --git hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTrivialExecService.java hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTrivialExecService.java new file mode 100644 index 0000000..a9b151c --- /dev/null +++ hcatalog/webhcat/svr/src/test/java/org/apache/hive/hcatalog/templeton/tool/TestTrivialExecService.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hive.hcatalog.templeton.tool; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.HashMap; + +import org.junit.Assert; +import org.junit.Test; + +public class TestTrivialExecService { + @Test + public void test() { + ArrayList list = new ArrayList(); + list.add("echo"); + list.add("success"); + BufferedReader out = null; + BufferedReader err = null; + try { + Process process = TrivialExecService.getInstance() + .run(list, + new ArrayList(), + new HashMap()); + out = new BufferedReader(new InputStreamReader( + process.getInputStream())); + err = new BufferedReader(new InputStreamReader( + process.getErrorStream())); + Assert.assertEquals("success", out.readLine()); + out.close(); + String line; + while ((line = err.readLine()) != null) { + Assert.fail(line); + } + process.waitFor(); + } catch (Exception e) { + e.printStackTrace(); + Assert.fail("Process caused exception."); + } finally { + try { + out.close(); + } catch (Exception ex) { + // Whatever. + } + try { + err.close(); + } catch (Exception ex) { + // Whatever + } + } + } +}